aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/garp.c18
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/stp.c4
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c93
-rw-r--r--net/8021q/vlan.h17
-rw-r--r--net/8021q/vlan_core.c121
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/9p/client.c233
-rw-r--r--net/9p/protocol.c5
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_virtio.c76
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/ax25/Kconfig8
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c114
-rw-r--r--net/bluetooth/cmtp/core.c6
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/hci_sysfs.c21
-rw-r--r--net/bluetooth/hidp/Kconfig2
-rw-r--r--net/bluetooth/hidp/core.c8
-rw-r--r--net/bluetooth/l2cap.c68
-rw-r--r--net/bluetooth/lib.c4
-rw-r--r--net/bluetooth/rfcomm/core.c56
-rw-r--r--net/bluetooth/rfcomm/sock.c104
-rw-r--r--net/bluetooth/rfcomm/tty.c8
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter.c134
-rw-r--r--net/bridge/netfilter/ebt_vlan.c25
-rw-r--r--net/bridge/netfilter/ebtables.c15
-rw-r--r--net/caif/caif_config_util.c13
-rw-r--r--net/caif/caif_dev.c26
-rw-r--r--net/caif/caif_socket.c72
-rw-r--r--net/caif/cfcnfg.c66
-rw-r--r--net/caif/cfctrl.c58
-rw-r--r--net/caif/cfdbgl.c18
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c14
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c47
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/raw.c37
-rw-r--r--net/compat.c10
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c615
-rw-r--r--net/core/dst.c40
-rw-r--r--net/core/ethtool.c91
-rw-r--r--net/core/fib_rules.c37
-rw-r--r--net/core/filter.c78
-rw-r--r--net/core/flow.c82
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c26
-rw-r--r--net/core/neighbour.c486
-rw-r--r--net/core/net-sysfs.c59
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c53
-rw-r--r--net/core/rtnetlink.c48
-rw-r--r--net/core/skbuff.c105
-rw-r--r--net/core/sock.c20
-rw-r--r--net/core/sysctl_net_core.c3
-rw-r--r--net/core/utils.c15
-rw-r--r--net/dccp/ccid.h86
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c310
-rw-r--r--net/dccp/ccids/ccid2.h40
-rw-r--r--net/dccp/ccids/ccid3.c268
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
-rw-r--r--net/dccp/dccp.h51
-rw-r--r--net/dccp/feat.c10
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/input.c20
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c30
-rw-r--r--net/dccp/options.c31
-rw-r--r--net/dccp/output.c227
-rw-r--r--net/dccp/probe.c1
-rw-r--r--net/dccp/proto.c71
-rw-r--r--net/dccp/timer.c27
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_neigh.c13
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c3
-rw-r--r--net/decnet/sysctl_net_decnet.c4
-rw-r--r--net/econet/af_econet.c6
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ipv4/Kconfig12
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c245
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c11
-rw-r--r--net/ipv4/fib_frontend.c194
-rw-r--r--net/ipv4/fib_hash.c329
-rw-r--r--net/ipv4/fib_lookup.h16
-rw-r--r--net/ipv4/fib_rules.c13
-rw-r--r--net/ipv4/fib_semantics.c297
-rw-r--r--net/ipv4/fib_trie.c91
-rw-r--r--net/ipv4/gre.c152
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c26
-rw-r--r--net/ipv4/inet_diag.c29
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/inetpeer.c138
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c242
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c24
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipip.c213
-rw-r--r--net/ipv4/ipmr.c428
-rw-r--r--net/ipv4/netfilter/Kconfig6
-rw-r--r--net/ipv4/netfilter/arp_tables.c65
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c85
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c145
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c87
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c27
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/protocol.c33
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c265
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c17
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c70
-rw-r--r--net/ipv4/tcp_ipv4.c20
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/ipv4/tcp_probe.c1
-rw-r--r--net/ipv4/tcp_timer.c50
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tunnel4.c44
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c48
-rw-r--r--net/ipv6/addrlabel.c5
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/ipv6/datagram.c19
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/fib6_rules.c3
-rw-r--r--net/ipv6/ip6_fib.c9
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c159
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c27
-rw-r--r--net/ipv6/ndisc.c36
-rw-r--r--net/ipv6/netfilter/Kconfig9
-rw-r--r--net/ipv6/netfilter/Makefile8
-rw-r--r--net/ipv6/netfilter/ip6_tables.c99
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c157
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c78
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c21
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c131
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/protocol.c34
-rw-r--r--net/ipv6/raw.c14
-rw-r--r--net/ipv6/reassembly.c4
-rw-r--r--net/ipv6/route.c58
-rw-r--r--net/ipv6/sit.c166
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/tunnel6.c37
-rw-r--r--net/ipv6/udp.c18
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/ipx/Kconfig1
-rw-r--r--net/irda/af_irda.c380
-rw-r--r--net/irda/discovery.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c4
-rw-r--r--net/irda/iriap.c3
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/irda/irlan/irlan_event.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/irda/irlmp_frame.c2
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_irda.c22
-rw-r--r--net/irda/irnet/irnet_ppp.c71
-rw-r--r--net/irda/irnet/irnet_ppp.h3
-rw-r--r--net/irda/parameters.c4
-rw-r--r--net/iucv/iucv.c3
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_core.c53
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ip.c6
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c30
-rw-r--r--net/mac80211/agg-tx.c14
-rw-r--r--net/mac80211/cfg.c244
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c28
-rw-r--r--net/mac80211/debugfs_key.c63
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_sta.c7
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h42
-rw-r--r--net/mac80211/ht.c47
-rw-r--r--net/mac80211/ibss.c78
-rw-r--r--net/mac80211/ieee80211_i.h133
-rw-r--r--net/mac80211/iface.c466
-rw-r--r--net/mac80211/key.c168
-rw-r--r--net/mac80211/key.h13
-rw-r--r--net/mac80211/main.c207
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_plink.c17
-rw-r--r--net/mac80211/mlme.c173
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c15
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c1
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c7
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c3
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c3
-rw-r--r--net/mac80211/rx.c819
-rw-r--r--net/mac80211/scan.c179
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/status.c14
-rw-r--r--net/mac80211/tx.c73
-rw-r--r--net/mac80211/util.c102
-rw-r--r--net/mac80211/wep.c10
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c34
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/Kconfig20
-rw-r--r--net/netfilter/ipvs/Makefile10
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c286
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c819
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c392
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c194
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c292
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c147
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c169
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c99
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c52
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c696
-rw-r--r--net/netfilter/nf_conntrack_core.c134
-rw-r--r--net/netfilter/nf_conntrack_expect.c68
-rw-r--r--net/netfilter/nf_conntrack_netlink.c77
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c4
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_tproxy_core.c35
-rw-r--r--net/netfilter/x_tables.c12
-rw-r--r--net/netfilter/xt_TPROXY.c368
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/netfilter/xt_ipvs.c1
-rw-r--r--net/netfilter/xt_recent.c1
-rw-r--r--net/netfilter/xt_socket.c170
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/netlink/genetlink.c14
-rw-r--r--net/nonet.c1
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/phonet/Kconfig12
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c13
-rw-r--r--net/phonet/pep.c388
-rw-r--r--net/phonet/pn_dev.c5
-rw-r--r--net/phonet/socket.c289
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c159
-rw-r--r--net/rds/ib.c200
-rw-r--r--net/rds/ib.h104
-rw-r--r--net/rds/ib_cm.c184
-rw-r--r--net/rds/ib_rdma.c318
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c19
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c8
-rw-r--r--net/rds/iw.h15
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c8
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c6
-rw-r--r--net/rds/loop.c35
-rw-r--r--net/rds/message.c149
-rw-r--r--net/rds/page.c8
-rw-r--r--net/rds/rdma.c421
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c44
-rw-r--r--net/rds/rdma_transport.h4
-rw-r--r--net/rds/rds.h192
-rw-r--r--net/rds/recv.c12
-rw-r--r--net/rds/send.c552
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c18
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c17
-rw-r--r--net/rds/tcp_send.c68
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/rfkill/core.c1
-rw-r--r--net/rfkill/input.c2
-rw-r--r--net/rose/rose_link.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/act_ipt.c14
-rw-r--r--net/sched/cls_basic.c4
-rw-r--r--net/sched/cls_cgroup.c2
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_generic.c24
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c12
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sched/sch_teql.c8
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c5
-rw-r--r--net/sctp/protocol.c21
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c89
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c58
-rw-r--r--net/sunrpc/Kconfig19
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/auth_generic.c2
-rw-r--r--net/sunrpc/auth_gss/Makefile5
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c247
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c186
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c267
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_unseal.c127
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c51
-rw-r--r--net/sunrpc/cache.c305
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/netns.h19
-rw-r--r--net/sunrpc/rpc_pipe.c49
-rw-r--r--net/sunrpc/rpcb_clnt.c60
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/stats.c43
-rw-r--r--net/sunrpc/sunrpc_syms.c58
-rw-r--r--net/sunrpc/svc.c3
-rw-r--r--net/sunrpc/svc_xprt.c59
-rw-r--r--net/sunrpc/svcauth_unix.c194
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xdr.c61
-rw-r--r--net/sunrpc/xprt.c39
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c49
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtsock.c358
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/bcast.c51
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c42
-rw-r--r--net/tipc/cluster.c21
-rw-r--r--net/tipc/cluster.h2
-rw-r--r--net/tipc/config.c148
-rw-r--r--net/tipc/config.h6
-rw-r--r--net/tipc/core.c38
-rw-r--r--net/tipc/core.h9
-rw-r--r--net/tipc/dbg.c17
-rw-r--r--net/tipc/dbg.h3
-rw-r--r--net/tipc/discover.c44
-rw-r--r--net/tipc/discover.h5
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c188
-rw-r--r--net/tipc/link.h24
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c67
-rw-r--r--net/tipc/net.c10
-rw-r--r--net/tipc/node.c73
-rw-r--r--net/tipc/node.h3
-rw-r--r--net/tipc/port.c295
-rw-r--r--net/tipc/port.h4
-rw-r--r--net/tipc/ref.c17
-rw-r--r--net/tipc/ref.h1
-rw-r--r--net/tipc/socket.c84
-rw-r--r--net/tipc/subscr.c77
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/tipc/zone.c11
-rw-r--r--net/tipc/zone.h1
-rw-r--r--net/unix/af_unix.c24
-rw-r--r--net/wanrouter/wanmain.c4
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h34
-rw-r--r--net/wireless/debugfs.c2
-rw-r--r--net/wireless/ibss.c21
-rw-r--r--net/wireless/mlme.c225
-rw-r--r--net/wireless/nl80211.c2189
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/radiotap.c61
-rw-r--r--net/wireless/reg.c24
-rw-r--r--net/wireless/scan.c12
-rw-r--r--net/wireless/sme.c11
-rw-r--r--net/wireless/sysfs.c18
-rw-r--r--net/wireless/util.c40
-rw-r--r--net/wireless/wext-compat.c42
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/Kconfig1
-rw-r--r--net/x25/af_x25.c34
-rw-r--r--net/x25/x25_facilities.c20
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/xfrm/xfrm_policy.c7
477 files changed, 17757 insertions, 13100 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b8..1e49f2d4ea96 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
70 if(daddr) 70 if(daddr)
71 { 71 {
72 memcpy(fch->daddr,daddr,dev->addr_len); 72 memcpy(fch->daddr,daddr,dev->addr_len);
73 return(hdr_len); 73 return hdr_len;
74 } 74 }
75 return -hdr_len; 75 return -hdr_len;
76} 76}
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543a..94b3ad08f39a 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
82 if (daddr != NULL) 82 if (daddr != NULL)
83 { 83 {
84 memcpy(fddi->daddr, daddr, dev->addr_len); 84 memcpy(fddi->daddr, daddr, dev->addr_len);
85 return(hl); 85 return hl;
86 } 86 }
87 87
88 return(-hl); 88 return -hl;
89} 89}
90 90
91 91
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
108 { 108 {
109 printk("%s: Don't know how to resolve type %04X addresses.\n", 109 printk("%s: Don't know how to resolve type %04X addresses.\n",
110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); 110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
111 return(0); 111 return 0;
112 } 112 }
113} 113}
114 114
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
162 162
163 /* Assume 802.2 SNAP frames, for now */ 163 /* Assume 802.2 SNAP frames, for now */
164 164
165 return(type); 165 return type;
166} 166}
167 167
168EXPORT_SYMBOL(fddi_type_trans); 168EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
170int fddi_change_mtu(struct net_device *dev, int new_mtu) 170int fddi_change_mtu(struct net_device *dev, int new_mtu)
171{ 171{
172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) 172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
173 return(-EINVAL); 173 return -EINVAL;
174 dev->mtu = new_mtu; 174 dev->mtu = new_mtu;
175 return(0); 175 return 0;
176} 176}
177EXPORT_SYMBOL(fddi_change_mtu); 177EXPORT_SYMBOL(fddi_change_mtu);
178 178
diff --git a/net/802/garp.c b/net/802/garp.c
index 941f2a324d3a..c1df2dad8c6b 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -346,8 +346,8 @@ int garp_request_join(const struct net_device *dev,
346 const struct garp_application *appl, 346 const struct garp_application *appl,
347 const void *data, u8 len, u8 type) 347 const void *data, u8 len, u8 type)
348{ 348{
349 struct garp_port *port = dev->garp_port; 349 struct garp_port *port = rtnl_dereference(dev->garp_port);
350 struct garp_applicant *app = port->applicants[appl->type]; 350 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
351 struct garp_attr *attr; 351 struct garp_attr *attr;
352 352
353 spin_lock_bh(&app->lock); 353 spin_lock_bh(&app->lock);
@@ -366,8 +366,8 @@ void garp_request_leave(const struct net_device *dev,
366 const struct garp_application *appl, 366 const struct garp_application *appl,
367 const void *data, u8 len, u8 type) 367 const void *data, u8 len, u8 type)
368{ 368{
369 struct garp_port *port = dev->garp_port; 369 struct garp_port *port = rtnl_dereference(dev->garp_port);
370 struct garp_applicant *app = port->applicants[appl->type]; 370 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
371 struct garp_attr *attr; 371 struct garp_attr *attr;
372 372
373 spin_lock_bh(&app->lock); 373 spin_lock_bh(&app->lock);
@@ -546,11 +546,11 @@ static int garp_init_port(struct net_device *dev)
546 546
547static void garp_release_port(struct net_device *dev) 547static void garp_release_port(struct net_device *dev)
548{ 548{
549 struct garp_port *port = dev->garp_port; 549 struct garp_port *port = rtnl_dereference(dev->garp_port);
550 unsigned int i; 550 unsigned int i;
551 551
552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) { 552 for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
553 if (port->applicants[i]) 553 if (rtnl_dereference(port->applicants[i]))
554 return; 554 return;
555 } 555 }
556 rcu_assign_pointer(dev->garp_port, NULL); 556 rcu_assign_pointer(dev->garp_port, NULL);
@@ -565,7 +565,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
565 565
566 ASSERT_RTNL(); 566 ASSERT_RTNL();
567 567
568 if (!dev->garp_port) { 568 if (!rtnl_dereference(dev->garp_port)) {
569 err = garp_init_port(dev); 569 err = garp_init_port(dev);
570 if (err < 0) 570 if (err < 0)
571 goto err1; 571 goto err1;
@@ -601,8 +601,8 @@ EXPORT_SYMBOL_GPL(garp_init_applicant);
601 601
602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) 602void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
603{ 603{
604 struct garp_port *port = dev->garp_port; 604 struct garp_port *port = rtnl_dereference(dev->garp_port);
605 struct garp_applicant *app = port->applicants[appl->type]; 605 struct garp_applicant *app = rtnl_dereference(port->applicants[appl->type]);
606 606
607 ASSERT_RTNL(); 607 ASSERT_RTNL();
608 608
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e929529..91aca8780fd0 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
152 if ((new_mtu < 68) || (new_mtu > 65280)) 152 if ((new_mtu < 68) || (new_mtu > 65280))
153 return -EINVAL; 153 return -EINVAL;
154 dev->mtu = new_mtu; 154 dev->mtu = new_mtu;
155 return(0); 155 return 0;
156} 156}
157EXPORT_SYMBOL(hippi_change_mtu); 157EXPORT_SYMBOL(hippi_change_mtu);
158 158
diff --git a/net/802/stp.c b/net/802/stp.c
index 53c8f77f0ccd..978c30b1b36b 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -21,8 +21,8 @@
21#define GARP_ADDR_MAX 0x2F 21#define GARP_ADDR_MAX 0x2F
22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) 22#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
23 23
24static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; 24static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
25static const struct stp_proto *stp_proto __read_mostly; 25static const struct stp_proto __rcu *stp_proto __read_mostly;
26 26
27static struct llc_sap *sap __read_mostly; 27static struct llc_sap *sap __read_mostly;
28static unsigned int sap_registered; 28static unsigned int sap_registered;
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074df..5e20cf8a074b 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
145 { 145 {
146 memcpy(trh->daddr,daddr,dev->addr_len); 146 memcpy(trh->daddr,daddr,dev->addr_len);
147 tr_source_route(skb, trh, dev); 147 tr_source_route(skb, trh, dev);
148 return(hdr_len); 148 return hdr_len;
149 } 149 }
150 150
151 return -hdr_len; 151 return -hdr_len;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a2ad15250575..52077ca22072 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -44,9 +44,6 @@
44 44
45int vlan_net_id __read_mostly; 45int vlan_net_id __read_mostly;
46 46
47/* Our listing of VLAN group(s) */
48static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
49
50const char vlan_fullname[] = "802.1Q VLAN Support"; 47const char vlan_fullname[] = "802.1Q VLAN Support";
51const char vlan_version[] = DRV_VERSION; 48const char vlan_version[] = DRV_VERSION;
52static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 49static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
@@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
59 56
60/* End of global variables definitions. */ 57/* End of global variables definitions. */
61 58
62static inline unsigned int vlan_grp_hashfn(unsigned int idx)
63{
64 return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
65}
66
67/* Must be invoked with RCU read lock (no preempt) */
68static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
69{
70 struct vlan_group *grp;
71 struct hlist_node *n;
72 int hash = vlan_grp_hashfn(real_dev->ifindex);
73
74 hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
75 if (grp->real_dev == real_dev)
76 return grp;
77 }
78
79 return NULL;
80}
81
82/* Find the protocol handler. Assumes VID < VLAN_VID_MASK.
83 *
84 * Must be invoked with RCU read lock (no preempt)
85 */
86struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
87{
88 struct vlan_group *grp = __vlan_find_group(real_dev);
89
90 if (grp)
91 return vlan_group_get_device(grp, vlan_id);
92
93 return NULL;
94}
95
96static void vlan_group_free(struct vlan_group *grp) 59static void vlan_group_free(struct vlan_group *grp)
97{ 60{
98 int i; 61 int i;
@@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
111 return NULL; 74 return NULL;
112 75
113 grp->real_dev = real_dev; 76 grp->real_dev = real_dev;
114 hlist_add_head_rcu(&grp->hlist,
115 &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
116 return grp; 77 return grp;
117} 78}
118 79
@@ -151,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
151 112
152 ASSERT_RTNL(); 113 ASSERT_RTNL();
153 114
154 grp = __vlan_find_group(real_dev); 115 grp = rtnl_dereference(real_dev->vlgrp);
155 BUG_ON(!grp); 116 BUG_ON(!grp);
156 117
157 /* Take it out of our own structures, but be sure to interlock with 118 /* Take it out of our own structures, but be sure to interlock with
@@ -173,11 +134,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
173 if (grp->nr_vlans == 0) { 134 if (grp->nr_vlans == 0) {
174 vlan_gvrp_uninit_applicant(real_dev); 135 vlan_gvrp_uninit_applicant(real_dev);
175 136
176 if (real_dev->features & NETIF_F_HW_VLAN_RX) 137 rcu_assign_pointer(real_dev->vlgrp, NULL);
138 if (ops->ndo_vlan_rx_register)
177 ops->ndo_vlan_rx_register(real_dev, NULL); 139 ops->ndo_vlan_rx_register(real_dev, NULL);
178 140
179 hlist_del_rcu(&grp->hlist);
180
181 /* Free the group, after all cpu's are done. */ 141 /* Free the group, after all cpu's are done. */
182 call_rcu(&grp->rcu, vlan_rcu_free); 142 call_rcu(&grp->rcu, vlan_rcu_free);
183 } 143 }
@@ -196,18 +156,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
196 return -EOPNOTSUPP; 156 return -EOPNOTSUPP;
197 } 157 }
198 158
199 if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
200 pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
201 return -EOPNOTSUPP;
202 }
203
204 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && 159 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
205 (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { 160 (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
206 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); 161 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
207 return -EOPNOTSUPP; 162 return -EOPNOTSUPP;
208 } 163 }
209 164
210 if (__find_vlan_dev(real_dev, vlan_id) != NULL) 165 if (vlan_find_dev(real_dev, vlan_id) != NULL)
211 return -EEXIST; 166 return -EEXIST;
212 167
213 return 0; 168 return 0;
@@ -222,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
222 struct vlan_group *grp, *ngrp = NULL; 177 struct vlan_group *grp, *ngrp = NULL;
223 int err; 178 int err;
224 179
225 grp = __vlan_find_group(real_dev); 180 grp = rtnl_dereference(real_dev->vlgrp);
226 if (!grp) { 181 if (!grp) {
227 ngrp = grp = vlan_group_alloc(real_dev); 182 ngrp = grp = vlan_group_alloc(real_dev);
228 if (!grp) 183 if (!grp)
@@ -252,8 +207,11 @@ int register_vlan_dev(struct net_device *dev)
252 vlan_group_set_device(grp, vlan_id, dev); 207 vlan_group_set_device(grp, vlan_id, dev);
253 grp->nr_vlans++; 208 grp->nr_vlans++;
254 209
255 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) 210 if (ngrp) {
256 ops->ndo_vlan_rx_register(real_dev, ngrp); 211 if (ops->ndo_vlan_rx_register)
212 ops->ndo_vlan_rx_register(real_dev, ngrp);
213 rcu_assign_pointer(real_dev->vlgrp, ngrp);
214 }
257 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 215 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
258 ops->ndo_vlan_rx_add_vid(real_dev, vlan_id); 216 ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
259 217
@@ -264,7 +222,6 @@ out_uninit_applicant:
264 vlan_gvrp_uninit_applicant(real_dev); 222 vlan_gvrp_uninit_applicant(real_dev);
265out_free_group: 223out_free_group:
266 if (ngrp) { 224 if (ngrp) {
267 hlist_del_rcu(&ngrp->hlist);
268 /* Free the group, after all cpu's are done. */ 225 /* Free the group, after all cpu's are done. */
269 call_rcu(&ngrp->rcu, vlan_rcu_free); 226 call_rcu(&ngrp->rcu, vlan_rcu_free);
270 } 227 }
@@ -321,7 +278,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
321 if (new_dev == NULL) 278 if (new_dev == NULL)
322 return -ENOBUFS; 279 return -ENOBUFS;
323 280
324 new_dev->real_num_tx_queues = real_dev->real_num_tx_queues; 281 netif_copy_real_num_queues(new_dev, real_dev);
325 dev_net_set(new_dev, net); 282 dev_net_set(new_dev, net);
326 /* need 4 bytes for extra VLAN header info, 283 /* need 4 bytes for extra VLAN header info,
327 * hope the underlying device can handle it. 284 * hope the underlying device can handle it.
@@ -428,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
428 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); 385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
429 } 386 }
430 387
431 grp = __vlan_find_group(dev); 388 grp = rtnl_dereference(dev->vlgrp);
432 if (!grp) 389 if (!grp)
433 goto out; 390 goto out;
434 391
@@ -439,7 +396,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
439 switch (event) { 396 switch (event) {
440 case NETDEV_CHANGE: 397 case NETDEV_CHANGE:
441 /* Propagate real device state to vlan devices */ 398 /* Propagate real device state to vlan devices */
442 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 399 for (i = 0; i < VLAN_N_VID; i++) {
443 vlandev = vlan_group_get_device(grp, i); 400 vlandev = vlan_group_get_device(grp, i);
444 if (!vlandev) 401 if (!vlandev)
445 continue; 402 continue;
@@ -450,7 +407,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
450 407
451 case NETDEV_CHANGEADDR: 408 case NETDEV_CHANGEADDR:
452 /* Adjust unicast filters on underlying device */ 409 /* Adjust unicast filters on underlying device */
453 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 410 for (i = 0; i < VLAN_N_VID; i++) {
454 vlandev = vlan_group_get_device(grp, i); 411 vlandev = vlan_group_get_device(grp, i);
455 if (!vlandev) 412 if (!vlandev)
456 continue; 413 continue;
@@ -464,7 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
464 break; 421 break;
465 422
466 case NETDEV_CHANGEMTU: 423 case NETDEV_CHANGEMTU:
467 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 424 for (i = 0; i < VLAN_N_VID; i++) {
468 vlandev = vlan_group_get_device(grp, i); 425 vlandev = vlan_group_get_device(grp, i);
469 if (!vlandev) 426 if (!vlandev)
470 continue; 427 continue;
@@ -478,7 +435,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
478 435
479 case NETDEV_FEAT_CHANGE: 436 case NETDEV_FEAT_CHANGE:
480 /* Propagate device features to underlying device */ 437 /* Propagate device features to underlying device */
481 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 438 for (i = 0; i < VLAN_N_VID; i++) {
482 vlandev = vlan_group_get_device(grp, i); 439 vlandev = vlan_group_get_device(grp, i);
483 if (!vlandev) 440 if (!vlandev)
484 continue; 441 continue;
@@ -490,7 +447,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
490 447
491 case NETDEV_DOWN: 448 case NETDEV_DOWN:
492 /* Put all VLANs for this dev in the down state too. */ 449 /* Put all VLANs for this dev in the down state too. */
493 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 450 for (i = 0; i < VLAN_N_VID; i++) {
494 vlandev = vlan_group_get_device(grp, i); 451 vlandev = vlan_group_get_device(grp, i);
495 if (!vlandev) 452 if (!vlandev)
496 continue; 453 continue;
@@ -508,7 +465,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
508 465
509 case NETDEV_UP: 466 case NETDEV_UP:
510 /* Put all VLANs for this dev in the up state too. */ 467 /* Put all VLANs for this dev in the up state too. */
511 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 468 for (i = 0; i < VLAN_N_VID; i++) {
512 vlandev = vlan_group_get_device(grp, i); 469 vlandev = vlan_group_get_device(grp, i);
513 if (!vlandev) 470 if (!vlandev)
514 continue; 471 continue;
@@ -525,10 +482,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
525 break; 482 break;
526 483
527 case NETDEV_UNREGISTER: 484 case NETDEV_UNREGISTER:
485 /* twiddle thumbs on netns device moves */
486 if (dev->reg_state != NETREG_UNREGISTERING)
487 break;
488
528 /* Delete all VLANs for this dev. */ 489 /* Delete all VLANs for this dev. */
529 grp->killall = 1; 490 grp->killall = 1;
530 491
531 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 492 for (i = 0; i < VLAN_N_VID; i++) {
532 vlandev = vlan_group_get_device(grp, i); 493 vlandev = vlan_group_get_device(grp, i);
533 if (!vlandev) 494 if (!vlandev)
534 continue; 495 continue;
@@ -536,7 +497,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
536 /* unregistration of last vlan destroys group, abort 497 /* unregistration of last vlan destroys group, abort
537 * afterwards */ 498 * afterwards */
538 if (grp->nr_vlans == 1) 499 if (grp->nr_vlans == 1)
539 i = VLAN_GROUP_ARRAY_LEN; 500 i = VLAN_N_VID;
540 501
541 unregister_vlan_dev(vlandev, &list); 502 unregister_vlan_dev(vlandev, &list);
542 } 503 }
@@ -742,8 +703,6 @@ err0:
742 703
743static void __exit vlan_cleanup_module(void) 704static void __exit vlan_cleanup_module(void)
744{ 705{
745 unsigned int i;
746
747 vlan_ioctl_set(NULL); 706 vlan_ioctl_set(NULL);
748 vlan_netlink_fini(); 707 vlan_netlink_fini();
749 708
@@ -751,10 +710,6 @@ static void __exit vlan_cleanup_module(void)
751 710
752 dev_remove_pack(&vlan_packet_type); 711 dev_remove_pack(&vlan_packet_type);
753 712
754 /* This table must be empty if there are no module references left. */
755 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
756 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
757
758 unregister_pernet_subsys(&vlan_net_ops); 713 unregister_pernet_subsys(&vlan_net_ops);
759 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 714 rcu_barrier(); /* Wait for completion of call_rcu()'s */
760 715
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01da..db01b3181fdc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
72 return netdev_priv(dev); 72 return netdev_priv(dev);
73} 73}
74 74
75#define VLAN_GRP_HASH_SHIFT 5
76#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
77#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
78
79/* Find a VLAN device by the MAC address of its Ethernet device, and
80 * it's VLAN ID. The default configuration is to have VLAN's scope
81 * to be box-wide, so the MAC will be ignored. The mac will only be
82 * looked at if we are configured to have a separate set of VLANs per
83 * each MAC addressable interface. Note that this latter option does
84 * NOT follow the spec for VLANs, but may be useful for doing very
85 * large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
86 *
87 * Must be invoked with rcu_read_lock (ie preempt disabled)
88 * or with RTNL.
89 */
90struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
91
92/* found in vlan_dev.c */ 75/* found in vlan_dev.c */
93int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 76int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
94 struct packet_type *ptype, struct net_device *orig_dev); 77 struct packet_type *ptype, struct net_device *orig_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 0eb96f7e44be..69b2f79800a5 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,53 +4,29 @@
4#include <linux/netpoll.h> 4#include <linux/netpoll.h>
5#include "vlan.h" 5#include "vlan.h"
6 6
7/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ 7bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
9 u16 vlan_tci, int polling)
10{ 8{
9 struct sk_buff *skb = *skbp;
10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
11 struct net_device *vlan_dev; 11 struct net_device *vlan_dev;
12 u16 vlan_id; 12 struct vlan_rx_stats *rx_stats;
13
14 if (netpoll_rx(skb))
15 return NET_RX_DROP;
16
17 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
18 skb->deliver_no_wcard = 1;
19 13
20 skb->skb_iif = skb->dev->ifindex; 14 vlan_dev = vlan_find_dev(skb->dev, vlan_id);
21 __vlan_hwaccel_put_tag(skb, vlan_tci); 15 if (!vlan_dev) {
22 vlan_id = vlan_tci & VLAN_VID_MASK; 16 if (vlan_id)
23 vlan_dev = vlan_group_get_device(grp, vlan_id); 17 skb->pkt_type = PACKET_OTHERHOST;
24 18 return false;
25 if (vlan_dev)
26 skb->dev = vlan_dev;
27 else if (vlan_id) {
28 if (!(skb->dev->flags & IFF_PROMISC))
29 goto drop;
30 skb->pkt_type = PACKET_OTHERHOST;
31 } 19 }
32 20
33 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 21 skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
22 if (unlikely(!skb))
23 return false;
34 24
35drop: 25 skb->dev = vlan_dev;
36 dev_kfree_skb_any(skb); 26 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
37 return NET_RX_DROP;
38}
39EXPORT_SYMBOL(__vlan_hwaccel_rx);
40
41int vlan_hwaccel_do_receive(struct sk_buff *skb)
42{
43 struct net_device *dev = skb->dev;
44 struct vlan_rx_stats *rx_stats;
45
46 skb->dev = vlan_dev_info(dev)->real_dev;
47 netif_nit_deliver(skb);
48
49 skb->dev = dev;
50 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
51 skb->vlan_tci = 0; 27 skb->vlan_tci = 0;
52 28
53 rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats); 29 rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
54 30
55 u64_stats_update_begin(&rx_stats->syncp); 31 u64_stats_update_begin(&rx_stats->syncp);
56 rx_stats->rx_packets++; 32 rx_stats->rx_packets++;
@@ -67,12 +43,13 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
67 * This allows the VLAN to have a different MAC than the 43 * This allows the VLAN to have a different MAC than the
68 * underlying device, and still route correctly. */ 44 * underlying device, and still route correctly. */
69 if (!compare_ether_addr(eth_hdr(skb)->h_dest, 45 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
70 dev->dev_addr)) 46 vlan_dev->dev_addr))
71 skb->pkt_type = PACKET_HOST; 47 skb->pkt_type = PACKET_HOST;
72 break; 48 break;
73 } 49 }
74 u64_stats_update_end(&rx_stats->syncp); 50 u64_stats_update_end(&rx_stats->syncp);
75 return 0; 51
52 return true;
76} 53}
77 54
78struct net_device *vlan_dev_real_dev(const struct net_device *dev) 55struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -87,71 +64,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
87} 64}
88EXPORT_SYMBOL(vlan_dev_vlan_id); 65EXPORT_SYMBOL(vlan_dev_vlan_id);
89 66
90static gro_result_t 67/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
91vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, 68int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
92 unsigned int vlan_tci, struct sk_buff *skb) 69 u16 vlan_tci, int polling)
93{ 70{
94 struct sk_buff *p;
95 struct net_device *vlan_dev;
96 u16 vlan_id;
97
98 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
99 skb->deliver_no_wcard = 1;
100
101 skb->skb_iif = skb->dev->ifindex;
102 __vlan_hwaccel_put_tag(skb, vlan_tci); 71 __vlan_hwaccel_put_tag(skb, vlan_tci);
103 vlan_id = vlan_tci & VLAN_VID_MASK; 72 return polling ? netif_receive_skb(skb) : netif_rx(skb);
104 vlan_dev = vlan_group_get_device(grp, vlan_id);
105
106 if (vlan_dev)
107 skb->dev = vlan_dev;
108 else if (vlan_id) {
109 if (!(skb->dev->flags & IFF_PROMISC))
110 goto drop;
111 skb->pkt_type = PACKET_OTHERHOST;
112 }
113
114 for (p = napi->gro_list; p; p = p->next) {
115 NAPI_GRO_CB(p)->same_flow =
116 p->dev == skb->dev && !compare_ether_header(
117 skb_mac_header(p), skb_gro_mac_header(skb));
118 NAPI_GRO_CB(p)->flush = 0;
119 }
120
121 return dev_gro_receive(napi, skb);
122
123drop:
124 return GRO_DROP;
125} 73}
74EXPORT_SYMBOL(__vlan_hwaccel_rx);
126 75
127gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, 76gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
128 unsigned int vlan_tci, struct sk_buff *skb) 77 unsigned int vlan_tci, struct sk_buff *skb)
129{ 78{
130 if (netpoll_rx_on(skb)) 79 __vlan_hwaccel_put_tag(skb, vlan_tci);
131 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) 80 return napi_gro_receive(napi, skb);
132 ? GRO_DROP : GRO_NORMAL;
133
134 skb_gro_reset_offset(skb);
135
136 return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
137} 81}
138EXPORT_SYMBOL(vlan_gro_receive); 82EXPORT_SYMBOL(vlan_gro_receive);
139 83
140gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, 84gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
141 unsigned int vlan_tci) 85 unsigned int vlan_tci)
142{ 86{
143 struct sk_buff *skb = napi_frags_skb(napi); 87 __vlan_hwaccel_put_tag(napi->skb, vlan_tci);
144 88 return napi_gro_frags(napi);
145 if (!skb)
146 return GRO_DROP;
147
148 if (netpoll_rx_on(skb)) {
149 skb->protocol = eth_type_trans(skb, skb->dev);
150 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
151 ? GRO_DROP : GRO_NORMAL;
152 }
153
154 return napi_frags_finish(napi, skb,
155 vlan_gro_common(napi, grp, vlan_tci, skb));
156} 89}
157EXPORT_SYMBOL(vlan_gro_frags); 90EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3bccdd12a264..14e3d1fa07a0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
158 vlan_id = vlan_tci & VLAN_VID_MASK; 158 vlan_id = vlan_tci & VLAN_VID_MASK;
159 159
160 rcu_read_lock(); 160 rcu_read_lock();
161 vlan_dev = __find_vlan_dev(dev, vlan_id); 161 vlan_dev = vlan_find_dev(dev, vlan_id);
162 162
163 /* If the VLAN device is defined, we use it. 163 /* If the VLAN device is defined, we use it.
164 * If not, and the VID is 0, it is a 802.1p packet (not 164 * If not, and the VID is 0, it is a 802.1p packet (not
@@ -177,8 +177,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
177 } else { 177 } else {
178 skb->dev = vlan_dev; 178 skb->dev = vlan_dev;
179 179
180 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, 180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
181 smp_processor_id()); 181
182 u64_stats_update_begin(&rx_stats->syncp); 182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++; 183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len; 184 rx_stats->rx_bytes += skb->len;
@@ -226,12 +226,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
226 } 226 }
227 227
228 netif_rx(skb); 228 netif_rx(skb);
229
229 rcu_read_unlock(); 230 rcu_read_unlock();
230 return NET_RX_SUCCESS; 231 return NET_RX_SUCCESS;
231 232
232err_unlock: 233err_unlock:
233 rcu_read_unlock(); 234 rcu_read_unlock();
234err_free: 235err_free:
236 atomic_long_inc(&dev->rx_dropped);
235 kfree_skb(skb); 237 kfree_skb(skb);
236 return NET_RX_DROP; 238 return NET_RX_DROP;
237} 239}
@@ -843,7 +845,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
843 accum.rx_packets += rxpackets; 845 accum.rx_packets += rxpackets;
844 accum.rx_bytes += rxbytes; 846 accum.rx_bytes += rxbytes;
845 accum.rx_multicast += rxmulticast; 847 accum.rx_multicast += rxmulticast;
846 /* rx_errors is an ulong, not protected by syncp */ 848 /* rx_errors is ulong, not protected by syncp */
847 accum.rx_errors += p->rx_errors; 849 accum.rx_errors += p->rx_errors;
848 } 850 }
849 stats->rx_packets = accum.rx_packets; 851 stats->rx_packets = accum.rx_packets;
diff --git a/net/9p/client.c b/net/9p/client.c
index 9eb72505308f..a848bca9fbff 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
61 61
62inline int p9_is_proto_dotl(struct p9_client *clnt) 62inline int p9_is_proto_dotl(struct p9_client *clnt)
63{ 63{
64 return (clnt->proto_version == p9_proto_2000L); 64 return clnt->proto_version == p9_proto_2000L;
65} 65}
66EXPORT_SYMBOL(p9_is_proto_dotl); 66EXPORT_SYMBOL(p9_is_proto_dotl);
67 67
68inline int p9_is_proto_dotu(struct p9_client *clnt) 68inline int p9_is_proto_dotu(struct p9_client *clnt)
69{ 69{
70 return (clnt->proto_version == p9_proto_2000u); 70 return clnt->proto_version == p9_proto_2000u;
71} 71}
72EXPORT_SYMBOL(p9_is_proto_dotu); 72EXPORT_SYMBOL(p9_is_proto_dotu);
73 73
@@ -450,32 +450,43 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
450 return err; 450 return err;
451 } 451 }
452 452
453 if (type == P9_RERROR) { 453 if (type == P9_RERROR || type == P9_RLERROR) {
454 int ecode; 454 int ecode;
455 char *ename;
456 455
457 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 456 if (!p9_is_proto_dotl(c)) {
458 &ename, &ecode); 457 char *ename;
459 if (err) {
460 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
461 err);
462 return err;
463 }
464 458
465 if (p9_is_proto_dotu(c) || 459 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
466 p9_is_proto_dotl(c)) 460 &ename, &ecode);
467 err = -ecode; 461 if (err)
462 goto out_err;
463
464 if (p9_is_proto_dotu(c))
465 err = -ecode;
468 466
469 if (!err || !IS_ERR_VALUE(err)) 467 if (!err || !IS_ERR_VALUE(err)) {
470 err = p9_errstr2errno(ename, strlen(ename)); 468 err = p9_errstr2errno(ename, strlen(ename));
469
470 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
471
472 kfree(ename);
473 }
474 } else {
475 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
476 err = -ecode;
471 477
472 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); 478 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
479 }
473 480
474 kfree(ename);
475 } else 481 } else
476 err = 0; 482 err = 0;
477 483
478 return err; 484 return err;
485
486out_err:
487 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
488
489 return err;
479} 490}
480 491
481/** 492/**
@@ -568,11 +579,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
568 va_start(ap, fmt); 579 va_start(ap, fmt);
569 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 580 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
570 va_end(ap); 581 va_end(ap);
582 if (err)
583 goto reterr;
571 p9pdu_finalize(req->tc); 584 p9pdu_finalize(req->tc);
572 585
573 err = c->trans_mod->request(c, req); 586 err = c->trans_mod->request(c, req);
574 if (err < 0) { 587 if (err < 0) {
575 c->status = Disconnected; 588 if (err != -ERESTARTSYS)
589 c->status = Disconnected;
576 goto reterr; 590 goto reterr;
577 } 591 }
578 592
@@ -671,7 +685,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
671 kfree(fid); 685 kfree(fid);
672} 686}
673 687
674int p9_client_version(struct p9_client *c) 688static int p9_client_version(struct p9_client *c)
675{ 689{
676 int err = 0; 690 int err = 0;
677 struct p9_req_t *req; 691 struct p9_req_t *req;
@@ -730,7 +744,6 @@ error:
730 744
731 return err; 745 return err;
732} 746}
733EXPORT_SYMBOL(p9_client_version);
734 747
735struct p9_client *p9_client_create(const char *dev_name, char *options) 748struct p9_client *p9_client_create(const char *dev_name, char *options)
736{ 749{
@@ -887,54 +900,6 @@ error:
887} 900}
888EXPORT_SYMBOL(p9_client_attach); 901EXPORT_SYMBOL(p9_client_attach);
889 902
890struct p9_fid *
891p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
892{
893 int err;
894 struct p9_req_t *req;
895 struct p9_qid qid;
896 struct p9_fid *afid;
897
898 P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
899 err = 0;
900
901 afid = p9_fid_create(clnt);
902 if (IS_ERR(afid)) {
903 err = PTR_ERR(afid);
904 afid = NULL;
905 goto error;
906 }
907
908 req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
909 afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
910 if (IS_ERR(req)) {
911 err = PTR_ERR(req);
912 goto error;
913 }
914
915 err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
916 if (err) {
917 p9pdu_dump(1, req->rc);
918 p9_free_req(clnt, req);
919 goto error;
920 }
921
922 P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
923 qid.type,
924 (unsigned long long)qid.path,
925 qid.version);
926
927 memmove(&afid->qid, &qid, sizeof(struct p9_qid));
928 p9_free_req(clnt, req);
929 return afid;
930
931error:
932 if (afid)
933 p9_fid_destroy(afid);
934 return ERR_PTR(err);
935}
936EXPORT_SYMBOL(p9_client_auth);
937
938struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, 903struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
939 int clone) 904 int clone)
940{ 905{
@@ -1200,12 +1165,44 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
1200} 1165}
1201EXPORT_SYMBOL(p9_client_link); 1166EXPORT_SYMBOL(p9_client_link);
1202 1167
1168int p9_client_fsync(struct p9_fid *fid, int datasync)
1169{
1170 int err;
1171 struct p9_client *clnt;
1172 struct p9_req_t *req;
1173
1174 P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
1175 fid->fid, datasync);
1176 err = 0;
1177 clnt = fid->clnt;
1178
1179 req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
1180 if (IS_ERR(req)) {
1181 err = PTR_ERR(req);
1182 goto error;
1183 }
1184
1185 P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);
1186
1187 p9_free_req(clnt, req);
1188
1189error:
1190 return err;
1191}
1192EXPORT_SYMBOL(p9_client_fsync);
1193
1203int p9_client_clunk(struct p9_fid *fid) 1194int p9_client_clunk(struct p9_fid *fid)
1204{ 1195{
1205 int err; 1196 int err;
1206 struct p9_client *clnt; 1197 struct p9_client *clnt;
1207 struct p9_req_t *req; 1198 struct p9_req_t *req;
1208 1199
1200 if (!fid) {
1201 P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n");
1202 dump_stack();
1203 return 0;
1204 }
1205
1209 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); 1206 P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
1210 err = 0; 1207 err = 0;
1211 clnt = fid->clnt; 1208 clnt = fid->clnt;
@@ -1289,16 +1286,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1289 1286
1290 if (data) { 1287 if (data) {
1291 memmove(data, dataptr, count); 1288 memmove(data, dataptr, count);
1292 } 1289 } else {
1293
1294 if (udata) {
1295 err = copy_to_user(udata, dataptr, count); 1290 err = copy_to_user(udata, dataptr, count);
1296 if (err) { 1291 if (err) {
1297 err = -EFAULT; 1292 err = -EFAULT;
1298 goto free_and_error; 1293 goto free_and_error;
1299 } 1294 }
1300 } 1295 }
1301
1302 p9_free_req(clnt, req); 1296 p9_free_req(clnt, req);
1303 return count; 1297 return count;
1304 1298
@@ -1810,3 +1804,96 @@ error:
1810 1804
1811} 1805}
1812EXPORT_SYMBOL(p9_client_mkdir_dotl); 1806EXPORT_SYMBOL(p9_client_mkdir_dotl);
1807
1808int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
1809{
1810 int err;
1811 struct p9_client *clnt;
1812 struct p9_req_t *req;
1813
1814 err = 0;
1815 clnt = fid->clnt;
1816 P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "
1817 "start %lld length %lld proc_id %d client_id %s\n",
1818 fid->fid, flock->type, flock->flags, flock->start,
1819 flock->length, flock->proc_id, flock->client_id);
1820
1821 req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type,
1822 flock->flags, flock->start, flock->length,
1823 flock->proc_id, flock->client_id);
1824
1825 if (IS_ERR(req))
1826 return PTR_ERR(req);
1827
1828 err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
1829 if (err) {
1830 p9pdu_dump(1, req->rc);
1831 goto error;
1832 }
1833 P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
1834error:
1835 p9_free_req(clnt, req);
1836 return err;
1837
1838}
1839EXPORT_SYMBOL(p9_client_lock_dotl);
1840
1841int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
1842{
1843 int err;
1844 struct p9_client *clnt;
1845 struct p9_req_t *req;
1846
1847 err = 0;
1848 clnt = fid->clnt;
1849 P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "
1850 "length %lld proc_id %d client_id %s\n", fid->fid, glock->type,
1851 glock->start, glock->length, glock->proc_id, glock->client_id);
1852
1853 req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type,
1854 glock->start, glock->length, glock->proc_id, glock->client_id);
1855
1856 if (IS_ERR(req))
1857 return PTR_ERR(req);
1858
1859 err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
1860 &glock->start, &glock->length, &glock->proc_id,
1861 &glock->client_id);
1862 if (err) {
1863 p9pdu_dump(1, req->rc);
1864 goto error;
1865 }
1866 P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
1867 "proc_id %d client_id %s\n", glock->type, glock->start,
1868 glock->length, glock->proc_id, glock->client_id);
1869error:
1870 p9_free_req(clnt, req);
1871 return err;
1872}
1873EXPORT_SYMBOL(p9_client_getlock_dotl);
1874
1875int p9_client_readlink(struct p9_fid *fid, char **target)
1876{
1877 int err;
1878 struct p9_client *clnt;
1879 struct p9_req_t *req;
1880
1881 err = 0;
1882 clnt = fid->clnt;
1883 P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
1884
1885 req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);
1886 if (IS_ERR(req))
1887 return PTR_ERR(req);
1888
1889 err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
1890 if (err) {
1891 p9pdu_dump(1, req->rc);
1892 goto error;
1893 }
1894 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
1895error:
1896 p9_free_req(clnt, req);
1897 return err;
1898}
1899EXPORT_SYMBOL(p9_client_readlink);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 3acd3afb20c8..45c15f491401 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -122,9 +122,8 @@ static size_t
122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) 122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
123{ 123{
124 size_t len = MIN(pdu->capacity - pdu->size, size); 124 size_t len = MIN(pdu->capacity - pdu->size, size);
125 int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); 125 if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
126 if (err) 126 len = 0;
127 printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
128 127
129 pdu->size += len; 128 pdu->size += len;
130 return size - len; 129 return size - len;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809ca..078eb162d9bf 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
222 } 222 }
223} 223}
224 224
225static unsigned int 225static int
226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
227{ 227{
228 int ret, n; 228 int ret, n;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index b88515936e4b..c8f3f72ab20e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -75,6 +75,8 @@ struct virtio_chan {
75 struct p9_client *client; 75 struct p9_client *client;
76 struct virtio_device *vdev; 76 struct virtio_device *vdev;
77 struct virtqueue *vq; 77 struct virtqueue *vq;
78 int ring_bufs_avail;
79 wait_queue_head_t *vc_wq;
78 80
79 /* Scatterlist: can be too big for stack. */ 81 /* Scatterlist: can be too big for stack. */
80 struct scatterlist sg[VIRTQUEUE_NUM]; 82 struct scatterlist sg[VIRTQUEUE_NUM];
@@ -134,16 +136,30 @@ static void req_done(struct virtqueue *vq)
134 struct p9_fcall *rc; 136 struct p9_fcall *rc;
135 unsigned int len; 137 unsigned int len;
136 struct p9_req_t *req; 138 struct p9_req_t *req;
139 unsigned long flags;
137 140
138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 141 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
139 142
140 while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) { 143 do {
141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 144 spin_lock_irqsave(&chan->lock, flags);
142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 145 rc = virtqueue_get_buf(chan->vq, &len);
143 req = p9_tag_lookup(chan->client, rc->tag); 146
144 req->status = REQ_STATUS_RCVD; 147 if (rc != NULL) {
145 p9_client_cb(chan->client, req); 148 if (!chan->ring_bufs_avail) {
146 } 149 chan->ring_bufs_avail = 1;
150 wake_up(chan->vc_wq);
151 }
152 spin_unlock_irqrestore(&chan->lock, flags);
153 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
154 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
155 rc->tag);
156 req = p9_tag_lookup(chan->client, rc->tag);
157 req->status = REQ_STATUS_RCVD;
158 p9_client_cb(chan->client, req);
159 } else {
160 spin_unlock_irqrestore(&chan->lock, flags);
161 }
162 } while (rc != NULL);
147} 163}
148 164
149/** 165/**
@@ -199,23 +215,43 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
199 int in, out; 215 int in, out;
200 struct virtio_chan *chan = client->trans; 216 struct virtio_chan *chan = client->trans;
201 char *rdata = (char *)req->rc+sizeof(struct p9_fcall); 217 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
218 unsigned long flags;
219 int err;
202 220
203 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 221 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
204 222
223req_retry:
224 req->status = REQ_STATUS_SENT;
225
226 spin_lock_irqsave(&chan->lock, flags);
205 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 227 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
206 req->tc->size); 228 req->tc->size);
207 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, 229 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
208 client->msize); 230 client->msize);
209 231
210 req->status = REQ_STATUS_SENT; 232 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
211 233 if (err < 0) {
212 if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { 234 if (err == -ENOSPC) {
213 P9_DPRINTK(P9_DEBUG_TRANS, 235 chan->ring_bufs_avail = 0;
214 "9p debug: virtio rpc add_buf returned failure"); 236 spin_unlock_irqrestore(&chan->lock, flags);
215 return -EIO; 237 err = wait_event_interruptible(*chan->vc_wq,
238 chan->ring_bufs_avail);
239 if (err == -ERESTARTSYS)
240 return err;
241
242 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
243 goto req_retry;
244 } else {
245 spin_unlock_irqrestore(&chan->lock, flags);
246 P9_DPRINTK(P9_DEBUG_TRANS,
247 "9p debug: "
248 "virtio rpc add_buf returned failure");
249 return -EIO;
250 }
216 } 251 }
217 252
218 virtqueue_kick(chan->vq); 253 virtqueue_kick(chan->vq);
254 spin_unlock_irqrestore(&chan->lock, flags);
219 255
220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 256 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
221 return 0; 257 return 0;
@@ -290,14 +326,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
290 chan->tag_len = tag_len; 326 chan->tag_len = tag_len;
291 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 327 err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
292 if (err) { 328 if (err) {
293 kfree(tag); 329 goto out_free_tag;
294 goto out_free_vq;
295 } 330 }
331 chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
332 if (!chan->vc_wq) {
333 err = -ENOMEM;
334 goto out_free_tag;
335 }
336 init_waitqueue_head(chan->vc_wq);
337 chan->ring_bufs_avail = 1;
338
296 mutex_lock(&virtio_9p_lock); 339 mutex_lock(&virtio_9p_lock);
297 list_add_tail(&chan->chan_list, &virtio_chan_list); 340 list_add_tail(&chan->chan_list, &virtio_chan_list);
298 mutex_unlock(&virtio_9p_lock); 341 mutex_unlock(&virtio_9p_lock);
299 return 0; 342 return 0;
300 343
344out_free_tag:
345 kfree(tag);
301out_free_vq: 346out_free_vq:
302 vdev->config->del_vqs(vdev); 347 vdev->config->del_vqs(vdev);
303 kfree(chan); 348 kfree(chan);
@@ -371,6 +416,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
371 mutex_unlock(&virtio_9p_lock); 416 mutex_unlock(&virtio_9p_lock);
372 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); 417 sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
373 kfree(chan->tag); 418 kfree(chan->tag);
419 kfree(chan->vc_wq);
374 kfree(chan); 420 kfree(chan);
375 421
376} 422}
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 95fdd1185067..ff956d1115bc 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -310,9 +310,9 @@ static int clip_constructor(struct neighbour *neigh)
310 return 0; 310 return 0;
311} 311}
312 312
313static u32 clip_hash(const void *pkey, const struct net_device *dev) 313static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
314{ 314{
315 return jhash_2words(*(u32 *) pkey, dev->ifindex, clip_tbl.hash_rnd); 315 return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
316} 316}
317 317
318static struct neigh_table clip_tbl = { 318static struct neigh_table clip_tbl = {
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3d..1b9c52a02cd3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
792 default: 792 default:
793 if (level == SOL_SOCKET) 793 if (level == SOL_SOCKET)
794 return -EINVAL; 794 return -EINVAL;
795 break; 795 break;
796 } 796 }
797 if (!vcc->dev || !vcc->dev->ops->getsockopt) 797 if (!vcc->dev || !vcc->dev->ops->getsockopt)
798 return -EINVAL; 798 return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac8..181d70c73d70 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
220static int lec_open(struct net_device *dev) 220static int lec_open(struct net_device *dev)
221{ 221{
222 netif_start_queue(dev); 222 netif_start_queue(dev);
223 memset(&dev->stats, 0, sizeof(struct net_device_stats));
224 223
225 return 0; 224 return 0;
226} 225}
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 6262aeae398e..f85da0779e5e 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -38,6 +38,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
38static const struct file_operations proc_atm_dev_ops = { 38static const struct file_operations proc_atm_dev_ops = {
39 .owner = THIS_MODULE, 39 .owner = THIS_MODULE,
40 .read = proc_dev_atm_read, 40 .read = proc_dev_atm_read,
41 .llseek = noop_llseek,
41}; 42};
42 43
43static void add_stats(struct seq_file *seq, const char *aal, 44static void add_stats(struct seq_file *seq, const char *aal,
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 2a72aa96a568..705e53ef4af0 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -7,7 +7,7 @@ menuconfig HAMRADIO
7 bool "Amateur Radio support" 7 bool "Amateur Radio support"
8 help 8 help
9 If you want to connect your Linux box to an amateur radio, answer Y 9 If you want to connect your Linux box to an amateur radio, answer Y
10 here. You want to read <http://www.tapr.org/tapr/html/pkthome.html> 10 here. You want to read <http://www.tapr.org/>
11 and more specifically about AX.25 on Linux 11 and more specifically about AX.25 on Linux
12 <http://www.linux-ax25.org/>. 12 <http://www.linux-ax25.org/>.
13 13
@@ -42,7 +42,7 @@ config AX25
42 check out the file <file:Documentation/networking/ax25.txt> in the 42 check out the file <file:Documentation/networking/ax25.txt> in the
43 kernel source. More information about digital amateur radio in 43 kernel source. More information about digital amateur radio in
44 general is on the WWW at 44 general is on the WWW at
45 <http://www.tapr.org/tapr/html/pkthome.html>. 45 <http://www.tapr.org/>.
46 46
47 To compile this driver as a module, choose M here: the 47 To compile this driver as a module, choose M here: the
48 module will be called ax25. 48 module will be called ax25.
@@ -89,7 +89,7 @@ config NETROM
89 <http://www.linux-ax25.org>. You also might want to check out the 89 <http://www.linux-ax25.org>. You also might want to check out the
90 file <file:Documentation/networking/ax25.txt>. More information about 90 file <file:Documentation/networking/ax25.txt>. More information about
91 digital amateur radio in general is on the WWW at 91 digital amateur radio in general is on the WWW at
92 <http://www.tapr.org/tapr/html/pkthome.html>. 92 <http://www.tapr.org/>.
93 93
94 To compile this driver as a module, choose M here: the 94 To compile this driver as a module, choose M here: the
95 module will be called netrom. 95 module will be called netrom.
@@ -108,7 +108,7 @@ config ROSE
108 <http://www.linux-ax25.org>. You also might want to check out the 108 <http://www.linux-ax25.org>. You also might want to check out the
109 file <file:Documentation/networking/ax25.txt>. More information about 109 file <file:Documentation/networking/ax25.txt>. More information about
110 digital amateur radio in general is on the WWW at 110 digital amateur radio in general is on the WWW at
111 <http://www.tapr.org/tapr/html/pkthome.html>. 111 <http://www.tapr.org/>.
112 112
113 To compile this driver as a module, choose M here: the 113 To compile this driver as a module, choose M here: the
114 module will be called rose. 114 module will be called rose.
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a172..bb86d2932394 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
1103out: 1103out:
1104 release_sock(sk); 1104 release_sock(sk);
1105 1105
1106 return 0; 1106 return err;
1107} 1107}
1108 1108
1109/* 1109/*
@@ -1392,6 +1392,7 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1392 ax25_cb *ax25; 1392 ax25_cb *ax25;
1393 int err = 0; 1393 int err = 0;
1394 1394
1395 memset(fsa, 0, sizeof(fsa));
1395 lock_sock(sk); 1396 lock_sock(sk);
1396 ax25 = ax25_sk(sk); 1397 ax25 = ax25_sk(sk);
1397 1398
@@ -1403,7 +1404,6 @@ static int ax25_getname(struct socket *sock, struct sockaddr *uaddr,
1403 1404
1404 fsa->fsa_ax25.sax25_family = AF_AX25; 1405 fsa->fsa_ax25.sax25_family = AF_AX25;
1405 fsa->fsa_ax25.sax25_call = ax25->dest_addr; 1406 fsa->fsa_ax25.sax25_call = ax25->dest_addr;
1406 fsa->fsa_ax25.sax25_ndigis = 0;
1407 1407
1408 if (ax25->digipeat != NULL) { 1408 if (ax25->digipeat != NULL) {
1409 ndigi = ax25->digipeat->ndigi; 1409 ndigi = ax25->digipeat->ndigi;
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd6..a1690845dc6e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
412{ 412{
413 ax25_uid_assoc *user; 413 ax25_uid_assoc *user;
414 ax25_route *ax25_rt; 414 ax25_route *ax25_rt;
415 int err; 415 int err = 0;
416 416
417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) 417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
418 return -EHOSTUNREACH; 418 return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
453put: 453put:
454 ax25_put_route(ax25_rt); 454 ax25_put_route(ax25_rt);
455 455
456 return 0; 456 return err;
457} 457}
458 458
459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, 459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b95..c4cf3f595004 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -265,6 +265,115 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
265} 265}
266EXPORT_SYMBOL(bt_sock_recvmsg); 266EXPORT_SYMBOL(bt_sock_recvmsg);
267 267
268static long bt_sock_data_wait(struct sock *sk, long timeo)
269{
270 DECLARE_WAITQUEUE(wait, current);
271
272 add_wait_queue(sk_sleep(sk), &wait);
273 for (;;) {
274 set_current_state(TASK_INTERRUPTIBLE);
275
276 if (!skb_queue_empty(&sk->sk_receive_queue))
277 break;
278
279 if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
280 break;
281
282 if (signal_pending(current) || !timeo)
283 break;
284
285 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
286 release_sock(sk);
287 timeo = schedule_timeout(timeo);
288 lock_sock(sk);
289 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
290 }
291
292 __set_current_state(TASK_RUNNING);
293 remove_wait_queue(sk_sleep(sk), &wait);
294 return timeo;
295}
296
297int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
298 struct msghdr *msg, size_t size, int flags)
299{
300 struct sock *sk = sock->sk;
301 int err = 0;
302 size_t target, copied = 0;
303 long timeo;
304
305 if (flags & MSG_OOB)
306 return -EOPNOTSUPP;
307
308 msg->msg_namelen = 0;
309
310 BT_DBG("sk %p size %zu", sk, size);
311
312 lock_sock(sk);
313
314 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
315 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
316
317 do {
318 struct sk_buff *skb;
319 int chunk;
320
321 skb = skb_dequeue(&sk->sk_receive_queue);
322 if (!skb) {
323 if (copied >= target)
324 break;
325
326 if ((err = sock_error(sk)) != 0)
327 break;
328 if (sk->sk_shutdown & RCV_SHUTDOWN)
329 break;
330
331 err = -EAGAIN;
332 if (!timeo)
333 break;
334
335 timeo = bt_sock_data_wait(sk, timeo);
336
337 if (signal_pending(current)) {
338 err = sock_intr_errno(timeo);
339 goto out;
340 }
341 continue;
342 }
343
344 chunk = min_t(unsigned int, skb->len, size);
345 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
346 skb_queue_head(&sk->sk_receive_queue, skb);
347 if (!copied)
348 copied = -EFAULT;
349 break;
350 }
351 copied += chunk;
352 size -= chunk;
353
354 sock_recv_ts_and_drops(msg, sk, skb);
355
356 if (!(flags & MSG_PEEK)) {
357 skb_pull(skb, chunk);
358 if (skb->len) {
359 skb_queue_head(&sk->sk_receive_queue, skb);
360 break;
361 }
362 kfree_skb(skb);
363
364 } else {
365 /* put message back and return */
366 skb_queue_head(&sk->sk_receive_queue, skb);
367 break;
368 }
369 } while (size);
370
371out:
372 release_sock(sk);
373 return copied ? : err;
374}
375EXPORT_SYMBOL(bt_sock_stream_recvmsg);
376
268static inline unsigned int bt_accept_poll(struct sock *parent) 377static inline unsigned int bt_accept_poll(struct sock *parent)
269{ 378{
270 struct list_head *p, *n; 379 struct list_head *p, *n;
@@ -297,13 +406,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
297 mask |= POLLERR; 406 mask |= POLLERR;
298 407
299 if (sk->sk_shutdown & RCV_SHUTDOWN) 408 if (sk->sk_shutdown & RCV_SHUTDOWN)
300 mask |= POLLRDHUP; 409 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
301 410
302 if (sk->sk_shutdown == SHUTDOWN_MASK) 411 if (sk->sk_shutdown == SHUTDOWN_MASK)
303 mask |= POLLHUP; 412 mask |= POLLHUP;
304 413
305 if (!skb_queue_empty(&sk->sk_receive_queue) || 414 if (!skb_queue_empty(&sk->sk_receive_queue))
306 (sk->sk_shutdown & RCV_SHUTDOWN))
307 mask |= POLLIN | POLLRDNORM; 415 mask |= POLLIN | POLLRDNORM;
308 416
309 if (sk->sk_state == BT_CLOSED) 417 if (sk->sk_state == BT_CLOSED)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index d4c6af082d48..ec0a1347f933 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -321,14 +321,10 @@ static int cmtp_session(void *arg)
321int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) 321int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
322{ 322{
323 struct cmtp_session *session, *s; 323 struct cmtp_session *session, *s;
324 bdaddr_t src, dst;
325 int i, err; 324 int i, err;
326 325
327 BT_DBG(""); 326 BT_DBG("");
328 327
329 baswap(&src, &bt_sk(sock->sk)->src);
330 baswap(&dst, &bt_sk(sock->sk)->dst);
331
332 session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); 328 session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
333 if (!session) 329 if (!session)
334 return -ENOMEM; 330 return -ENOMEM;
@@ -347,7 +343,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
347 343
348 BT_DBG("mtu %d", session->mtu); 344 BT_DBG("mtu %d", session->mtu);
349 345
350 sprintf(session->name, "%s", batostr(&dst)); 346 sprintf(session->name, "%s", batostr(&bt_sk(sock->sk)->dst));
351 347
352 session->sock = sock; 348 session->sock = sock;
353 session->state = BT_CONFIG; 349 session->state = BT_CONFIG;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c52f091ee6de..bc2a052e518b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,7 +562,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
562 hci_dev_lock_bh(hdev); 562 hci_dev_lock_bh(hdev);
563 inquiry_cache_flush(hdev); 563 inquiry_cache_flush(hdev);
564 hci_conn_hash_flush(hdev); 564 hci_conn_hash_flush(hdev);
565 hci_blacklist_clear(hdev);
566 hci_dev_unlock_bh(hdev); 565 hci_dev_unlock_bh(hdev);
567 566
568 hci_notify(hdev, HCI_DEV_DOWN); 567 hci_notify(hdev, HCI_DEV_DOWN);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bfef5bae0b3a..84093b0000b9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1175,6 +1175,12 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
1175 hci_send_cmd(hdev, 1175 hci_send_cmd(hdev,
1176 HCI_OP_READ_REMOTE_EXT_FEATURES, 1176 HCI_OP_READ_REMOTE_EXT_FEATURES,
1177 sizeof(cp), &cp); 1177 sizeof(cp), &cp);
1178 } else if (!ev->status && conn->out &&
1179 conn->sec_level == BT_SECURITY_HIGH) {
1180 struct hci_cp_auth_requested cp;
1181 cp.handle = ev->handle;
1182 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
1183 sizeof(cp), &cp);
1178 } else { 1184 } else {
1179 conn->state = BT_CONNECTED; 1185 conn->state = BT_CONNECTED;
1180 hci_proto_connect_cfm(conn, ev->status); 1186 hci_proto_connect_cfm(conn, ev->status);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 8fb967beee80..5fce3d6d07b4 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -37,9 +37,7 @@ static ssize_t show_link_type(struct device *dev, struct device_attribute *attr,
37static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf) 37static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf)
38{ 38{
39 struct hci_conn *conn = dev_get_drvdata(dev); 39 struct hci_conn *conn = dev_get_drvdata(dev);
40 bdaddr_t bdaddr; 40 return sprintf(buf, "%s\n", batostr(&conn->dst));
41 baswap(&bdaddr, &conn->dst);
42 return sprintf(buf, "%s\n", batostr(&bdaddr));
43} 41}
44 42
45static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf) 43static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -196,8 +194,8 @@ static inline char *host_typetostr(int type)
196 switch (type) { 194 switch (type) {
197 case HCI_BREDR: 195 case HCI_BREDR:
198 return "BR/EDR"; 196 return "BR/EDR";
199 case HCI_80211: 197 case HCI_AMP:
200 return "802.11"; 198 return "AMP";
201 default: 199 default:
202 return "UNKNOWN"; 200 return "UNKNOWN";
203 } 201 }
@@ -238,9 +236,7 @@ static ssize_t show_class(struct device *dev, struct device_attribute *attr, cha
238static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) 236static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf)
239{ 237{
240 struct hci_dev *hdev = dev_get_drvdata(dev); 238 struct hci_dev *hdev = dev_get_drvdata(dev);
241 bdaddr_t bdaddr; 239 return sprintf(buf, "%s\n", batostr(&hdev->bdaddr));
242 baswap(&bdaddr, &hdev->bdaddr);
243 return sprintf(buf, "%s\n", batostr(&bdaddr));
244} 240}
245 241
246static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf) 242static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -408,10 +404,8 @@ static int inquiry_cache_show(struct seq_file *f, void *p)
408 404
409 for (e = cache->list; e; e = e->next) { 405 for (e = cache->list; e; e = e->next) {
410 struct inquiry_data *data = &e->data; 406 struct inquiry_data *data = &e->data;
411 bdaddr_t bdaddr;
412 baswap(&bdaddr, &data->bdaddr);
413 seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", 407 seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
414 batostr(&bdaddr), 408 batostr(&data->bdaddr),
415 data->pscan_rep_mode, data->pscan_period_mode, 409 data->pscan_rep_mode, data->pscan_period_mode,
416 data->pscan_mode, data->dev_class[2], 410 data->pscan_mode, data->dev_class[2],
417 data->dev_class[1], data->dev_class[0], 411 data->dev_class[1], data->dev_class[0],
@@ -445,13 +439,10 @@ static int blacklist_show(struct seq_file *f, void *p)
445 439
446 list_for_each(l, &hdev->blacklist) { 440 list_for_each(l, &hdev->blacklist) {
447 struct bdaddr_list *b; 441 struct bdaddr_list *b;
448 bdaddr_t bdaddr;
449 442
450 b = list_entry(l, struct bdaddr_list, list); 443 b = list_entry(l, struct bdaddr_list, list);
451 444
452 baswap(&bdaddr, &b->bdaddr); 445 seq_printf(f, "%s\n", batostr(&b->bdaddr));
453
454 seq_printf(f, "%s\n", batostr(&bdaddr));
455 } 446 }
456 447
457 hci_dev_unlock_bh(hdev); 448 hci_dev_unlock_bh(hdev);
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index 98fdfa1fbddd..86a91543172a 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,6 +1,6 @@
1config BT_HIDP 1config BT_HIDP
2 tristate "HIDP protocol support" 2 tristate "HIDP protocol support"
3 depends on BT && BT_L2CAP && INPUT 3 depends on BT && BT_L2CAP && INPUT && HID_SUPPORT
4 select HID 4 select HID
5 help 5 help
6 HIDP (Human Interface Device Protocol) is a transport layer 6 HIDP (Human Interface Device Protocol) is a transport layer
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index bfe641b7dfaf..c0ee8b3928ed 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -758,7 +758,6 @@ static int hidp_setup_hid(struct hidp_session *session,
758 struct hidp_connadd_req *req) 758 struct hidp_connadd_req *req)
759{ 759{
760 struct hid_device *hid; 760 struct hid_device *hid;
761 bdaddr_t src, dst;
762 int err; 761 int err;
763 762
764 session->rd_data = kzalloc(req->rd_size, GFP_KERNEL); 763 session->rd_data = kzalloc(req->rd_size, GFP_KERNEL);
@@ -781,9 +780,6 @@ static int hidp_setup_hid(struct hidp_session *session,
781 780
782 hid->driver_data = session; 781 hid->driver_data = session;
783 782
784 baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
785 baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
786
787 hid->bus = BUS_BLUETOOTH; 783 hid->bus = BUS_BLUETOOTH;
788 hid->vendor = req->vendor; 784 hid->vendor = req->vendor;
789 hid->product = req->product; 785 hid->product = req->product;
@@ -791,8 +787,8 @@ static int hidp_setup_hid(struct hidp_session *session,
791 hid->country = req->country; 787 hid->country = req->country;
792 788
793 strncpy(hid->name, req->name, 128); 789 strncpy(hid->name, req->name, 128);
794 strncpy(hid->phys, batostr(&src), 64); 790 strncpy(hid->phys, batostr(&bt_sk(session->ctrl_sock->sk)->src), 64);
795 strncpy(hid->uniq, batostr(&dst), 64); 791 strncpy(hid->uniq, batostr(&bt_sk(session->ctrl_sock->sk)->dst), 64);
796 792
797 hid->dev.parent = hidp_get_device(session); 793 hid->dev.parent = hidp_get_device(session);
798 hid->ll_driver = &hidp_hid_driver; 794 hid->ll_driver = &hidp_hid_driver;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0b54b7dd8401..cd8f6ea03841 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1008,10 +1008,20 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
1008 goto done; 1008 goto done;
1009 } 1009 }
1010 1010
1011 if (la.l2_psm && __le16_to_cpu(la.l2_psm) < 0x1001 && 1011 if (la.l2_psm) {
1012 !capable(CAP_NET_BIND_SERVICE)) { 1012 __u16 psm = __le16_to_cpu(la.l2_psm);
1013 err = -EACCES; 1013
1014 goto done; 1014 /* PSM must be odd and lsb of upper byte must be 0 */
1015 if ((psm & 0x0101) != 0x0001) {
1016 err = -EINVAL;
1017 goto done;
1018 }
1019
1020 /* Restrict usage of well-known PSMs */
1021 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
1022 err = -EACCES;
1023 goto done;
1024 }
1015 } 1025 }
1016 1026
1017 write_lock_bh(&l2cap_sk_list.lock); 1027 write_lock_bh(&l2cap_sk_list.lock);
@@ -1190,6 +1200,13 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1190 goto done; 1200 goto done;
1191 } 1201 }
1192 1202
1203 /* PSM must be odd and lsb of upper byte must be 0 */
1204 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
1205 sk->sk_type != SOCK_RAW) {
1206 err = -EINVAL;
1207 goto done;
1208 }
1209
1193 /* Set destination address and psm */ 1210 /* Set destination address and psm */
1194 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); 1211 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
1195 l2cap_pi(sk)->psm = la.l2_psm; 1212 l2cap_pi(sk)->psm = la.l2_psm;
@@ -1635,7 +1652,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
1635 1652
1636 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err); 1653 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
1637 if (!*frag) 1654 if (!*frag)
1638 return -EFAULT; 1655 return err;
1639 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) 1656 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
1640 return -EFAULT; 1657 return -EFAULT;
1641 1658
@@ -1661,7 +1678,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr
1661 skb = bt_skb_send_alloc(sk, count + hlen, 1678 skb = bt_skb_send_alloc(sk, count + hlen,
1662 msg->msg_flags & MSG_DONTWAIT, &err); 1679 msg->msg_flags & MSG_DONTWAIT, &err);
1663 if (!skb) 1680 if (!skb)
1664 return ERR_PTR(-ENOMEM); 1681 return ERR_PTR(err);
1665 1682
1666 /* Create L2CAP header */ 1683 /* Create L2CAP header */
1667 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1684 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1690,7 +1707,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
1690 skb = bt_skb_send_alloc(sk, count + hlen, 1707 skb = bt_skb_send_alloc(sk, count + hlen,
1691 msg->msg_flags & MSG_DONTWAIT, &err); 1708 msg->msg_flags & MSG_DONTWAIT, &err);
1692 if (!skb) 1709 if (!skb)
1693 return ERR_PTR(-ENOMEM); 1710 return ERR_PTR(err);
1694 1711
1695 /* Create L2CAP header */ 1712 /* Create L2CAP header */
1696 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1713 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1727,7 +1744,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
1727 skb = bt_skb_send_alloc(sk, count + hlen, 1744 skb = bt_skb_send_alloc(sk, count + hlen,
1728 msg->msg_flags & MSG_DONTWAIT, &err); 1745 msg->msg_flags & MSG_DONTWAIT, &err);
1729 if (!skb) 1746 if (!skb)
1730 return ERR_PTR(-ENOMEM); 1747 return ERR_PTR(err);
1731 1748
1732 /* Create L2CAP header */ 1749 /* Create L2CAP header */
1733 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1750 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1934,6 +1951,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1934 1951
1935 release_sock(sk); 1952 release_sock(sk);
1936 1953
1954 if (sock->type == SOCK_STREAM)
1955 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
1956
1937 return bt_sock_recvmsg(iocb, sock, msg, len, flags); 1957 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1938} 1958}
1939 1959
@@ -2401,11 +2421,11 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned
2401 break; 2421 break;
2402 2422
2403 case 2: 2423 case 2:
2404 *val = __le16_to_cpu(*((__le16 *) opt->val)); 2424 *val = get_unaligned_le16(opt->val);
2405 break; 2425 break;
2406 2426
2407 case 4: 2427 case 4:
2408 *val = __le32_to_cpu(*((__le32 *) opt->val)); 2428 *val = get_unaligned_le32(opt->val);
2409 break; 2429 break;
2410 2430
2411 default: 2431 default:
@@ -2432,11 +2452,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
2432 break; 2452 break;
2433 2453
2434 case 2: 2454 case 2:
2435 *((__le16 *) opt->val) = cpu_to_le16(val); 2455 put_unaligned_le16(val, opt->val);
2436 break; 2456 break;
2437 2457
2438 case 4: 2458 case 4:
2439 *((__le32 *) opt->val) = cpu_to_le32(val); 2459 put_unaligned_le32(val, opt->val);
2440 break; 2460 break;
2441 2461
2442 default: 2462 default:
@@ -2891,7 +2911,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
2891 struct l2cap_chan_list *list = &conn->chan_list; 2911 struct l2cap_chan_list *list = &conn->chan_list;
2892 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; 2912 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2893 struct l2cap_conn_rsp rsp; 2913 struct l2cap_conn_rsp rsp;
2894 struct sock *parent, *uninitialized_var(sk); 2914 struct sock *parent, *sk = NULL;
2895 int result, status = L2CAP_CS_NO_INFO; 2915 int result, status = L2CAP_CS_NO_INFO;
2896 2916
2897 u16 dcid = 0, scid = __le16_to_cpu(req->scid); 2917 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3000,7 +3020,7 @@ sendresp:
3000 L2CAP_INFO_REQ, sizeof(info), &info); 3020 L2CAP_INFO_REQ, sizeof(info), &info);
3001 } 3021 }
3002 3022
3003 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && 3023 if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
3004 result == L2CAP_CR_SUCCESS) { 3024 result == L2CAP_CR_SUCCESS) {
3005 u8 buf[128]; 3025 u8 buf[128];
3006 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; 3026 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
@@ -3151,6 +3171,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
3151 3171
3152 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { 3172 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
3153 u8 buf[64]; 3173 u8 buf[64];
3174 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3154 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 3175 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3155 l2cap_build_conf_req(sk, buf), buf); 3176 l2cap_build_conf_req(sk, buf), buf);
3156 l2cap_pi(sk)->num_conf_req++; 3177 l2cap_pi(sk)->num_conf_req++;
@@ -4643,6 +4664,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4643 4664
4644 if (flags & ACL_START) { 4665 if (flags & ACL_START) {
4645 struct l2cap_hdr *hdr; 4666 struct l2cap_hdr *hdr;
4667 struct sock *sk;
4668 u16 cid;
4646 int len; 4669 int len;
4647 4670
4648 if (conn->rx_len) { 4671 if (conn->rx_len) {
@@ -4653,7 +4676,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4653 l2cap_conn_unreliable(conn, ECOMM); 4676 l2cap_conn_unreliable(conn, ECOMM);
4654 } 4677 }
4655 4678
4656 if (skb->len < 2) { 4679 /* Start fragment always begin with Basic L2CAP header */
4680 if (skb->len < L2CAP_HDR_SIZE) {
4657 BT_ERR("Frame is too short (len %d)", skb->len); 4681 BT_ERR("Frame is too short (len %d)", skb->len);
4658 l2cap_conn_unreliable(conn, ECOMM); 4682 l2cap_conn_unreliable(conn, ECOMM);
4659 goto drop; 4683 goto drop;
@@ -4661,6 +4685,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4661 4685
4662 hdr = (struct l2cap_hdr *) skb->data; 4686 hdr = (struct l2cap_hdr *) skb->data;
4663 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE; 4687 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
4688 cid = __le16_to_cpu(hdr->cid);
4664 4689
4665 if (len == skb->len) { 4690 if (len == skb->len) {
4666 /* Complete frame received */ 4691 /* Complete frame received */
@@ -4677,6 +4702,19 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4677 goto drop; 4702 goto drop;
4678 } 4703 }
4679 4704
4705 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
4706
4707 if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) {
4708 BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)",
4709 len, l2cap_pi(sk)->imtu);
4710 bh_unlock_sock(sk);
4711 l2cap_conn_unreliable(conn, ECOMM);
4712 goto drop;
4713 }
4714
4715 if (sk)
4716 bh_unlock_sock(sk);
4717
4680 /* Allocate skb for the complete frame (with header) */ 4718 /* Allocate skb for the complete frame (with header) */
4681 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC); 4719 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC);
4682 if (!conn->rx_skb) 4720 if (!conn->rx_skb)
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index ad2af5814e40..b826d1bf10df 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -51,8 +51,8 @@ char *batostr(bdaddr_t *ba)
51 51
52 i ^= 1; 52 i ^= 1;
53 sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X", 53 sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X",
54 ba->b[0], ba->b[1], ba->b[2], 54 ba->b[5], ba->b[4], ba->b[3],
55 ba->b[3], ba->b[4], ba->b[5]); 55 ba->b[2], ba->b[1], ba->b[0]);
56 56
57 return str[i]; 57 return str[i];
58} 58}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c57..fa642aa652bd 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -79,7 +79,10 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr);
79 79
80static void rfcomm_process_connect(struct rfcomm_session *s); 80static void rfcomm_process_connect(struct rfcomm_session *s);
81 81
82static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err); 82static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
83 bdaddr_t *dst,
84 u8 sec_level,
85 int *err);
83static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); 86static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
84static void rfcomm_session_del(struct rfcomm_session *s); 87static void rfcomm_session_del(struct rfcomm_session *s);
85 88
@@ -113,11 +116,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
113#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) 116#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
114#define __get_rpn_parity(line) (((line) >> 3) & 0x7) 117#define __get_rpn_parity(line) (((line) >> 3) & 0x7)
115 118
116static inline void rfcomm_schedule(uint event) 119static inline void rfcomm_schedule(void)
117{ 120{
118 if (!rfcomm_thread) 121 if (!rfcomm_thread)
119 return; 122 return;
120 //set_bit(event, &rfcomm_event);
121 set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); 123 set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
122 wake_up_process(rfcomm_thread); 124 wake_up_process(rfcomm_thread);
123} 125}
@@ -179,13 +181,13 @@ static unsigned char rfcomm_crc_table[256] = {
179/* FCS on 2 bytes */ 181/* FCS on 2 bytes */
180static inline u8 __fcs(u8 *data) 182static inline u8 __fcs(u8 *data)
181{ 183{
182 return (0xff - __crc(data)); 184 return 0xff - __crc(data);
183} 185}
184 186
185/* FCS on 3 bytes */ 187/* FCS on 3 bytes */
186static inline u8 __fcs2(u8 *data) 188static inline u8 __fcs2(u8 *data)
187{ 189{
188 return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]); 190 return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
189} 191}
190 192
191/* Check FCS */ 193/* Check FCS */
@@ -203,13 +205,13 @@ static inline int __check_fcs(u8 *data, int type, u8 fcs)
203static void rfcomm_l2state_change(struct sock *sk) 205static void rfcomm_l2state_change(struct sock *sk)
204{ 206{
205 BT_DBG("%p state %d", sk, sk->sk_state); 207 BT_DBG("%p state %d", sk, sk->sk_state);
206 rfcomm_schedule(RFCOMM_SCHED_STATE); 208 rfcomm_schedule();
207} 209}
208 210
209static void rfcomm_l2data_ready(struct sock *sk, int bytes) 211static void rfcomm_l2data_ready(struct sock *sk, int bytes)
210{ 212{
211 BT_DBG("%p bytes %d", sk, bytes); 213 BT_DBG("%p bytes %d", sk, bytes);
212 rfcomm_schedule(RFCOMM_SCHED_RX); 214 rfcomm_schedule();
213} 215}
214 216
215static int rfcomm_l2sock_create(struct socket **sock) 217static int rfcomm_l2sock_create(struct socket **sock)
@@ -255,7 +257,7 @@ static void rfcomm_session_timeout(unsigned long arg)
255 BT_DBG("session %p state %ld", s, s->state); 257 BT_DBG("session %p state %ld", s, s->state);
256 258
257 set_bit(RFCOMM_TIMED_OUT, &s->flags); 259 set_bit(RFCOMM_TIMED_OUT, &s->flags);
258 rfcomm_schedule(RFCOMM_SCHED_TIMEO); 260 rfcomm_schedule();
259} 261}
260 262
261static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) 263static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
@@ -283,7 +285,7 @@ static void rfcomm_dlc_timeout(unsigned long arg)
283 285
284 set_bit(RFCOMM_TIMED_OUT, &d->flags); 286 set_bit(RFCOMM_TIMED_OUT, &d->flags);
285 rfcomm_dlc_put(d); 287 rfcomm_dlc_put(d);
286 rfcomm_schedule(RFCOMM_SCHED_TIMEO); 288 rfcomm_schedule();
287} 289}
288 290
289static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout) 291static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout)
@@ -402,7 +404,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
402 404
403 s = rfcomm_session_get(src, dst); 405 s = rfcomm_session_get(src, dst);
404 if (!s) { 406 if (!s) {
405 s = rfcomm_session_create(src, dst, &err); 407 s = rfcomm_session_create(src, dst, d->sec_level, &err);
406 if (!s) 408 if (!s)
407 return err; 409 return err;
408 } 410 }
@@ -465,7 +467,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
465 case BT_CONFIG: 467 case BT_CONFIG:
466 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 468 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
467 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 469 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
468 rfcomm_schedule(RFCOMM_SCHED_AUTH); 470 rfcomm_schedule();
469 break; 471 break;
470 } 472 }
471 /* Fall through */ 473 /* Fall through */
@@ -485,7 +487,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
485 case BT_CONNECT2: 487 case BT_CONNECT2:
486 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 488 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
487 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 489 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
488 rfcomm_schedule(RFCOMM_SCHED_AUTH); 490 rfcomm_schedule();
489 break; 491 break;
490 } 492 }
491 /* Fall through */ 493 /* Fall through */
@@ -533,7 +535,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
533 skb_queue_tail(&d->tx_queue, skb); 535 skb_queue_tail(&d->tx_queue, skb);
534 536
535 if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) 537 if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
536 rfcomm_schedule(RFCOMM_SCHED_TX); 538 rfcomm_schedule();
537 return len; 539 return len;
538} 540}
539 541
@@ -545,7 +547,7 @@ void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
545 d->v24_sig |= RFCOMM_V24_FC; 547 d->v24_sig |= RFCOMM_V24_FC;
546 set_bit(RFCOMM_MSC_PENDING, &d->flags); 548 set_bit(RFCOMM_MSC_PENDING, &d->flags);
547 } 549 }
548 rfcomm_schedule(RFCOMM_SCHED_TX); 550 rfcomm_schedule();
549} 551}
550 552
551void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) 553void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
@@ -556,7 +558,7 @@ void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
556 d->v24_sig &= ~RFCOMM_V24_FC; 558 d->v24_sig &= ~RFCOMM_V24_FC;
557 set_bit(RFCOMM_MSC_PENDING, &d->flags); 559 set_bit(RFCOMM_MSC_PENDING, &d->flags);
558 } 560 }
559 rfcomm_schedule(RFCOMM_SCHED_TX); 561 rfcomm_schedule();
560} 562}
561 563
562/* 564/*
@@ -577,7 +579,7 @@ int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig)
577 d->v24_sig = v24_sig; 579 d->v24_sig = v24_sig;
578 580
579 if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags)) 581 if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags))
580 rfcomm_schedule(RFCOMM_SCHED_TX); 582 rfcomm_schedule();
581 583
582 return 0; 584 return 0;
583} 585}
@@ -680,7 +682,10 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
680 rfcomm_session_put(s); 682 rfcomm_session_put(s);
681} 683}
682 684
683static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst, int *err) 685static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
686 bdaddr_t *dst,
687 u8 sec_level,
688 int *err)
684{ 689{
685 struct rfcomm_session *s = NULL; 690 struct rfcomm_session *s = NULL;
686 struct sockaddr_l2 addr; 691 struct sockaddr_l2 addr;
@@ -705,6 +710,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
705 sk = sock->sk; 710 sk = sock->sk;
706 lock_sock(sk); 711 lock_sock(sk);
707 l2cap_pi(sk)->imtu = l2cap_mtu; 712 l2cap_pi(sk)->imtu = l2cap_mtu;
713 l2cap_pi(sk)->sec_level = sec_level;
708 if (l2cap_ertm) 714 if (l2cap_ertm)
709 l2cap_pi(sk)->mode = L2CAP_MODE_ERTM; 715 l2cap_pi(sk)->mode = L2CAP_MODE_ERTM;
710 release_sock(sk); 716 release_sock(sk);
@@ -816,7 +822,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d)
816 cmd->fcs = __fcs2((u8 *) cmd); 822 cmd->fcs = __fcs2((u8 *) cmd);
817 823
818 skb_queue_tail(&d->tx_queue, skb); 824 skb_queue_tail(&d->tx_queue, skb);
819 rfcomm_schedule(RFCOMM_SCHED_TX); 825 rfcomm_schedule();
820 return 0; 826 return 0;
821} 827}
822 828
@@ -1415,8 +1421,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
1415 return 0; 1421 return 0;
1416 1422
1417 if (len == 1) { 1423 if (len == 1) {
1418 /* This is a request, return default settings */ 1424 /* This is a request, return default (according to ETSI TS 07.10) settings */
1419 bit_rate = RFCOMM_RPN_BR_115200; 1425 bit_rate = RFCOMM_RPN_BR_9600;
1420 data_bits = RFCOMM_RPN_DATA_8; 1426 data_bits = RFCOMM_RPN_DATA_8;
1421 stop_bits = RFCOMM_RPN_STOP_1; 1427 stop_bits = RFCOMM_RPN_STOP_1;
1422 parity = RFCOMM_RPN_PARITY_NONE; 1428 parity = RFCOMM_RPN_PARITY_NONE;
@@ -1431,9 +1437,9 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
1431 1437
1432 if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) { 1438 if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) {
1433 bit_rate = rpn->bit_rate; 1439 bit_rate = rpn->bit_rate;
1434 if (bit_rate != RFCOMM_RPN_BR_115200) { 1440 if (bit_rate > RFCOMM_RPN_BR_230400) {
1435 BT_DBG("RPN bit rate mismatch 0x%x", bit_rate); 1441 BT_DBG("RPN bit rate mismatch 0x%x", bit_rate);
1436 bit_rate = RFCOMM_RPN_BR_115200; 1442 bit_rate = RFCOMM_RPN_BR_9600;
1437 rpn_mask ^= RFCOMM_RPN_PM_BITRATE; 1443 rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
1438 } 1444 }
1439 } 1445 }
@@ -1698,7 +1704,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1698 break; 1704 break;
1699 1705
1700 default: 1706 default:
1701 BT_ERR("Unknown packet type 0x%02x\n", type); 1707 BT_ERR("Unknown packet type 0x%02x", type);
1702 break; 1708 break;
1703 } 1709 }
1704 kfree_skb(skb); 1710 kfree_skb(skb);
@@ -1884,7 +1890,7 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
1884 * L2CAP MTU minus UIH header and FCS. */ 1890 * L2CAP MTU minus UIH header and FCS. */
1885 s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5; 1891 s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5;
1886 1892
1887 rfcomm_schedule(RFCOMM_SCHED_RX); 1893 rfcomm_schedule();
1888 } else 1894 } else
1889 sock_release(nsock); 1895 sock_release(nsock);
1890} 1896}
@@ -2093,7 +2099,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2093 2099
2094 rfcomm_session_put(s); 2100 rfcomm_session_put(s);
2095 2101
2096 rfcomm_schedule(RFCOMM_SCHED_AUTH); 2102 rfcomm_schedule();
2097} 2103}
2098 2104
2099static struct hci_cb rfcomm_cb = { 2105static struct hci_cb rfcomm_cb = {
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 194b3a04cfd3..aec505f934df 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -621,121 +621,29 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
621 return sent; 621 return sent;
622} 622}
623 623
624static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
625{
626 DECLARE_WAITQUEUE(wait, current);
627
628 add_wait_queue(sk_sleep(sk), &wait);
629 for (;;) {
630 set_current_state(TASK_INTERRUPTIBLE);
631
632 if (!skb_queue_empty(&sk->sk_receive_queue) ||
633 sk->sk_err ||
634 (sk->sk_shutdown & RCV_SHUTDOWN) ||
635 signal_pending(current) ||
636 !timeo)
637 break;
638
639 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
640 release_sock(sk);
641 timeo = schedule_timeout(timeo);
642 lock_sock(sk);
643 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
644 }
645
646 __set_current_state(TASK_RUNNING);
647 remove_wait_queue(sk_sleep(sk), &wait);
648 return timeo;
649}
650
651static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 624static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
652 struct msghdr *msg, size_t size, int flags) 625 struct msghdr *msg, size_t size, int flags)
653{ 626{
654 struct sock *sk = sock->sk; 627 struct sock *sk = sock->sk;
655 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; 628 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
656 int err = 0; 629 int len;
657 size_t target, copied = 0;
658 long timeo;
659 630
660 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 631 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
661 rfcomm_dlc_accept(d); 632 rfcomm_dlc_accept(d);
662 return 0; 633 return 0;
663 } 634 }
664 635
665 if (flags & MSG_OOB) 636 len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags);
666 return -EOPNOTSUPP;
667
668 msg->msg_namelen = 0;
669
670 BT_DBG("sk %p size %zu", sk, size);
671 637
672 lock_sock(sk); 638 lock_sock(sk);
639 if (!(flags & MSG_PEEK) && len > 0)
640 atomic_sub(len, &sk->sk_rmem_alloc);
673 641
674 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
675 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
676
677 do {
678 struct sk_buff *skb;
679 int chunk;
680
681 skb = skb_dequeue(&sk->sk_receive_queue);
682 if (!skb) {
683 if (copied >= target)
684 break;
685
686 if ((err = sock_error(sk)) != 0)
687 break;
688 if (sk->sk_shutdown & RCV_SHUTDOWN)
689 break;
690
691 err = -EAGAIN;
692 if (!timeo)
693 break;
694
695 timeo = rfcomm_sock_data_wait(sk, timeo);
696
697 if (signal_pending(current)) {
698 err = sock_intr_errno(timeo);
699 goto out;
700 }
701 continue;
702 }
703
704 chunk = min_t(unsigned int, skb->len, size);
705 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
706 skb_queue_head(&sk->sk_receive_queue, skb);
707 if (!copied)
708 copied = -EFAULT;
709 break;
710 }
711 copied += chunk;
712 size -= chunk;
713
714 sock_recv_ts_and_drops(msg, sk, skb);
715
716 if (!(flags & MSG_PEEK)) {
717 atomic_sub(chunk, &sk->sk_rmem_alloc);
718
719 skb_pull(skb, chunk);
720 if (skb->len) {
721 skb_queue_head(&sk->sk_receive_queue, skb);
722 break;
723 }
724 kfree_skb(skb);
725
726 } else {
727 /* put message back and return */
728 skb_queue_head(&sk->sk_receive_queue, skb);
729 break;
730 }
731 } while (size);
732
733out:
734 if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2)) 642 if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2))
735 rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc); 643 rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc);
736
737 release_sock(sk); 644 release_sock(sk);
738 return copied ? : err; 645
646 return len;
739} 647}
740 648
741static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) 649static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index befc3a52aa04..a9b81f5dacd1 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -183,9 +183,7 @@ static struct device *rfcomm_get_device(struct rfcomm_dev *dev)
183static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) 183static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
184{ 184{
185 struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); 185 struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
186 bdaddr_t bdaddr; 186 return sprintf(buf, "%s\n", batostr(&dev->dst));
187 baswap(&bdaddr, &dev->dst);
188 return sprintf(buf, "%s\n", batostr(&bdaddr));
189} 187}
190 188
191static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf) 189static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf)
@@ -844,10 +842,6 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, struct file *filp, unsigned
844 BT_DBG("TIOCMIWAIT"); 842 BT_DBG("TIOCMIWAIT");
845 break; 843 break;
846 844
847 case TIOCGICOUNT:
848 BT_DBG("TIOCGICOUNT");
849 break;
850
851 case TIOCGSERIAL: 845 case TIOCGSERIAL:
852 BT_ERR("TIOCGSERIAL is not supported"); 846 BT_ERR("TIOCGSERIAL is not supported");
853 return -ENOIOCTLCMD; 847 return -ENOIOCTLCMD;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index cf09fe591fc2..17cb0b633576 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -212,6 +212,11 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
212 return 0; 212 return 0;
213} 213}
214 214
215static int br_set_flags(struct net_device *netdev, u32 data)
216{
217 return ethtool_op_set_flags(netdev, data, ETH_FLAG_TXVLAN);
218}
219
215#ifdef CONFIG_NET_POLL_CONTROLLER 220#ifdef CONFIG_NET_POLL_CONTROLLER
216static void br_poll_controller(struct net_device *br_dev) 221static void br_poll_controller(struct net_device *br_dev)
217{ 222{
@@ -304,6 +309,7 @@ static const struct ethtool_ops br_ethtool_ops = {
304 .get_ufo = ethtool_op_get_ufo, 309 .get_ufo = ethtool_op_get_ufo,
305 .set_ufo = ethtool_op_set_ufo, 310 .set_ufo = ethtool_op_set_ufo,
306 .get_flags = ethtool_op_get_flags, 311 .get_flags = ethtool_op_get_flags,
312 .set_flags = br_set_flags,
307}; 313};
308 314
309static const struct net_device_ops br_netdev_ops = { 315static const struct net_device_ops br_netdev_ops = {
@@ -343,5 +349,5 @@ void br_dev_setup(struct net_device *dev)
343 349
344 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 350 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
345 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | 351 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
346 NETIF_F_NETNS_LOCAL | NETIF_F_GSO; 352 NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
347} 353}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03e..89ad25a76202 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
61} 61}
62 62
63 63
64/* 64/* Check for port carrier transistions. */
65 * Check for port carrier transistions.
66 * Called from work queue to allow for calling functions that
67 * might sleep (such as speed check), and to debounce.
68 */
69void br_port_carrier_check(struct net_bridge_port *p) 65void br_port_carrier_check(struct net_bridge_port *p)
70{ 66{
71 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
72 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
73 69
74 if (netif_carrier_ok(dev)) 70 if (netif_running(dev) && netif_carrier_ok(dev))
75 p->path_cost = port_cost(dev); 71 p->path_cost = port_cost(dev);
76 72
77 if (netif_running(br->dev)) { 73 if (!netif_running(br->dev))
78 spin_lock_bh(&br->lock); 74 return;
79 if (netif_carrier_ok(dev)) { 75
80 if (p->state == BR_STATE_DISABLED) 76 spin_lock_bh(&br->lock);
81 br_stp_enable_port(p); 77 if (netif_running(dev) && netif_carrier_ok(dev)) {
82 } else { 78 if (p->state == BR_STATE_DISABLED)
83 if (p->state != BR_STATE_DISABLED) 79 br_stp_enable_port(p);
84 br_stp_disable_port(p); 80 } else {
85 } 81 if (p->state != BR_STATE_DISABLED)
86 spin_unlock_bh(&br->lock); 82 br_stp_disable_port(p);
87 } 83 }
84 spin_unlock_bh(&br->lock);
88} 85}
89 86
90static void release_nbp(struct kobject *kobj) 87static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd5221536..25207a1f182b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
143 143
144 if (skb->pkt_type == PACKET_LOOPBACK) 144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 145 return skb;
146 146
147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
@@ -159,7 +159,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
159 goto drop; 159 goto drop;
160 160
161 /* If STP is turned off, then forward */ 161 /* If STP is turned off, then forward */
162 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) 162 if (p->br->stp_enabled == BR_NO_STP)
163 goto forward; 163 goto forward;
164 164
165 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 165 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 137f23259a93..865fd7634b67 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -64,22 +64,24 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
64 64
65static inline __be16 vlan_proto(const struct sk_buff *skb) 65static inline __be16 vlan_proto(const struct sk_buff *skb)
66{ 66{
67 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 67 if (vlan_tx_tag_present(skb))
68 return skb->protocol;
69 else if (skb->protocol == htons(ETH_P_8021Q))
70 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
71 else
72 return 0;
68} 73}
69 74
70#define IS_VLAN_IP(skb) \ 75#define IS_VLAN_IP(skb) \
71 (skb->protocol == htons(ETH_P_8021Q) && \ 76 (vlan_proto(skb) == htons(ETH_P_IP) && \
72 vlan_proto(skb) == htons(ETH_P_IP) && \
73 brnf_filter_vlan_tagged) 77 brnf_filter_vlan_tagged)
74 78
75#define IS_VLAN_IPV6(skb) \ 79#define IS_VLAN_IPV6(skb) \
76 (skb->protocol == htons(ETH_P_8021Q) && \ 80 (vlan_proto(skb) == htons(ETH_P_IPV6) && \
77 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
78 brnf_filter_vlan_tagged) 81 brnf_filter_vlan_tagged)
79 82
80#define IS_VLAN_ARP(skb) \ 83#define IS_VLAN_ARP(skb) \
81 (skb->protocol == htons(ETH_P_8021Q) && \ 84 (vlan_proto(skb) == htons(ETH_P_ARP) && \
82 vlan_proto(skb) == htons(ETH_P_ARP) && \
83 brnf_filter_vlan_tagged) 85 brnf_filter_vlan_tagged)
84 86
85static inline __be16 pppoe_proto(const struct sk_buff *skb) 87static inline __be16 pppoe_proto(const struct sk_buff *skb)
@@ -106,7 +108,6 @@ static struct dst_ops fake_dst_ops = {
106 .family = AF_INET, 108 .family = AF_INET,
107 .protocol = cpu_to_be16(ETH_P_IP), 109 .protocol = cpu_to_be16(ETH_P_IP),
108 .update_pmtu = fake_update_pmtu, 110 .update_pmtu = fake_update_pmtu,
109 .entries = ATOMIC_INIT(0),
110}; 111};
111 112
112/* 113/*
@@ -209,6 +210,72 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
209 skb->protocol = htons(ETH_P_PPP_SES); 210 skb->protocol = htons(ETH_P_PPP_SES);
210} 211}
211 212
213/* When handing a packet over to the IP layer
214 * check whether we have a skb that is in the
215 * expected format
216 */
217
218static int br_parse_ip_options(struct sk_buff *skb)
219{
220 struct ip_options *opt;
221 struct iphdr *iph;
222 struct net_device *dev = skb->dev;
223 u32 len;
224
225 iph = ip_hdr(skb);
226 opt = &(IPCB(skb)->opt);
227
228 /* Basic sanity checks */
229 if (iph->ihl < 5 || iph->version != 4)
230 goto inhdr_error;
231
232 if (!pskb_may_pull(skb, iph->ihl*4))
233 goto inhdr_error;
234
235 iph = ip_hdr(skb);
236 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
237 goto inhdr_error;
238
239 len = ntohs(iph->tot_len);
240 if (skb->len < len) {
241 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
242 goto drop;
243 } else if (len < (iph->ihl*4))
244 goto inhdr_error;
245
246 if (pskb_trim_rcsum(skb, len)) {
247 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
248 goto drop;
249 }
250
251 /* Zero out the CB buffer if no options present */
252 if (iph->ihl == 5) {
253 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
254 return 0;
255 }
256
257 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
258 if (ip_options_compile(dev_net(dev), opt, skb))
259 goto inhdr_error;
260
261 /* Check correct handling of SRR option */
262 if (unlikely(opt->srr)) {
263 struct in_device *in_dev = __in_dev_get_rcu(dev);
264 if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
265 goto drop;
266
267 if (ip_options_rcv_srr(skb))
268 goto drop;
269 }
270
271 return 0;
272
273inhdr_error:
274 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
275drop:
276 return -1;
277}
278
212/* Fill in the header for fragmented IP packets handled by 279/* Fill in the header for fragmented IP packets handled by
213 * the IPv4 connection tracking code. 280 * the IPv4 connection tracking code.
214 */ 281 */
@@ -549,7 +616,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
549{ 616{
550 struct net_bridge_port *p; 617 struct net_bridge_port *p;
551 struct net_bridge *br; 618 struct net_bridge *br;
552 struct iphdr *iph;
553 __u32 len = nf_bridge_encap_header_len(skb); 619 __u32 len = nf_bridge_encap_header_len(skb);
554 620
555 if (unlikely(!pskb_may_pull(skb, len))) 621 if (unlikely(!pskb_may_pull(skb, len)))
@@ -578,28 +644,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
578 644
579 nf_bridge_pull_encap_header_rcsum(skb); 645 nf_bridge_pull_encap_header_rcsum(skb);
580 646
581 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 647 if (br_parse_ip_options(skb))
582 goto inhdr_error; 648 /* Drop invalid packet */
583 649 goto out;
584 iph = ip_hdr(skb);
585 if (iph->ihl < 5 || iph->version != 4)
586 goto inhdr_error;
587
588 if (!pskb_may_pull(skb, 4 * iph->ihl))
589 goto inhdr_error;
590
591 iph = ip_hdr(skb);
592 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
593 goto inhdr_error;
594
595 len = ntohs(iph->tot_len);
596 if (skb->len < len || len < 4 * iph->ihl)
597 goto inhdr_error;
598
599 pskb_trim_rcsum(skb, len);
600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603 650
604 nf_bridge_put(skb->nf_bridge); 651 nf_bridge_put(skb->nf_bridge);
605 if (!nf_bridge_alloc(skb)) 652 if (!nf_bridge_alloc(skb))
@@ -614,8 +661,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
614 661
615 return NF_STOLEN; 662 return NF_STOLEN;
616 663
617inhdr_error:
618// IP_INC_STATS_BH(IpInHdrErrors);
619out: 664out:
620 return NF_DROP; 665 return NF_DROP;
621} 666}
@@ -759,14 +804,19 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
759#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 804#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
760static int br_nf_dev_queue_xmit(struct sk_buff *skb) 805static int br_nf_dev_queue_xmit(struct sk_buff *skb)
761{ 806{
807 int ret;
808
762 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && 809 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
763 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && 810 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
764 !skb_is_gso(skb)) { 811 !skb_is_gso(skb)) {
765 /* BUG: Should really parse the IP options here. */ 812 if (br_parse_ip_options(skb))
766 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 813 /* Drop invalid packet */
767 return ip_fragment(skb, br_dev_queue_push_xmit); 814 return NF_DROP;
815 ret = ip_fragment(skb, br_dev_queue_push_xmit);
768 } else 816 } else
769 return br_dev_queue_push_xmit(skb); 817 ret = br_dev_queue_push_xmit(skb);
818
819 return ret;
770} 820}
771#else 821#else
772static int br_nf_dev_queue_xmit(struct sk_buff *skb) 822static int br_nf_dev_queue_xmit(struct sk_buff *skb)
@@ -954,15 +1004,22 @@ int __init br_netfilter_init(void)
954{ 1004{
955 int ret; 1005 int ret;
956 1006
957 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1007 ret = dst_entries_init(&fake_dst_ops);
958 if (ret < 0) 1008 if (ret < 0)
959 return ret; 1009 return ret;
1010
1011 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1012 if (ret < 0) {
1013 dst_entries_destroy(&fake_dst_ops);
1014 return ret;
1015 }
960#ifdef CONFIG_SYSCTL 1016#ifdef CONFIG_SYSCTL
961 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); 1017 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
962 if (brnf_sysctl_header == NULL) { 1018 if (brnf_sysctl_header == NULL) {
963 printk(KERN_WARNING 1019 printk(KERN_WARNING
964 "br_netfilter: can't register to sysctl.\n"); 1020 "br_netfilter: can't register to sysctl.\n");
965 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1021 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1022 dst_entries_destroy(&fake_dst_ops);
966 return -ENOMEM; 1023 return -ENOMEM;
967 } 1024 }
968#endif 1025#endif
@@ -976,4 +1033,5 @@ void br_netfilter_fini(void)
976#ifdef CONFIG_SYSCTL 1033#ifdef CONFIG_SYSCTL
977 unregister_sysctl_table(brnf_sysctl_header); 1034 unregister_sysctl_table(brnf_sysctl_header);
978#endif 1035#endif
1036 dst_entries_destroy(&fake_dst_ops);
979} 1037}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 87b53b3a921d..eae67bf0446c 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -39,8 +39,6 @@ static bool
39ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) 39ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
40{ 40{
41 const struct ebt_vlan_info *info = par->matchinfo; 41 const struct ebt_vlan_info *info = par->matchinfo;
42 const struct vlan_hdr *fp;
43 struct vlan_hdr _frame;
44 42
45 unsigned short TCI; /* Whole TCI, given from parsed frame */ 43 unsigned short TCI; /* Whole TCI, given from parsed frame */
46 unsigned short id; /* VLAN ID, given from frame TCI */ 44 unsigned short id; /* VLAN ID, given from frame TCI */
@@ -48,9 +46,20 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
48 /* VLAN encapsulated Type/Length field, given from orig frame */ 46 /* VLAN encapsulated Type/Length field, given from orig frame */
49 __be16 encap; 47 __be16 encap;
50 48
51 fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); 49 if (vlan_tx_tag_present(skb)) {
52 if (fp == NULL) 50 TCI = vlan_tx_tag_get(skb);
53 return false; 51 encap = skb->protocol;
52 } else {
53 const struct vlan_hdr *fp;
54 struct vlan_hdr _frame;
55
56 fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
57 if (fp == NULL)
58 return false;
59
60 TCI = ntohs(fp->h_vlan_TCI);
61 encap = fp->h_vlan_encapsulated_proto;
62 }
54 63
55 /* Tag Control Information (TCI) consists of the following elements: 64 /* Tag Control Information (TCI) consists of the following elements:
56 * - User_priority. The user_priority field is three bits in length, 65 * - User_priority. The user_priority field is three bits in length,
@@ -59,10 +68,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
59 * (CFI) is a single bit flag value. Currently ignored. 68 * (CFI) is a single bit flag value. Currently ignored.
60 * - VLAN Identifier (VID). The VID is encoded as 69 * - VLAN Identifier (VID). The VID is encoded as
61 * an unsigned binary number. */ 70 * an unsigned binary number. */
62 TCI = ntohs(fp->h_vlan_TCI);
63 id = TCI & VLAN_VID_MASK; 71 id = TCI & VLAN_VID_MASK;
64 prio = (TCI >> 13) & 0x7; 72 prio = (TCI >> 13) & 0x7;
65 encap = fp->h_vlan_encapsulated_proto;
66 73
67 /* Checking VLAN Identifier (VID) */ 74 /* Checking VLAN Identifier (VID) */
68 if (GET_BITMASK(EBT_VLAN_ID)) 75 if (GET_BITMASK(EBT_VLAN_ID))
@@ -111,10 +118,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
111 * 0 - The null VLAN ID. 118 * 0 - The null VLAN ID.
112 * 1 - The default Port VID (PVID) 119 * 1 - The default Port VID (PVID)
113 * 0x0FFF - Reserved for implementation use. 120 * 0x0FFF - Reserved for implementation use.
114 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */ 121 * if_vlan.h: VLAN_N_VID 4096. */
115 if (GET_BITMASK(EBT_VLAN_ID)) { 122 if (GET_BITMASK(EBT_VLAN_ID)) {
116 if (!!info->id) { /* if id!=0 => check vid range */ 123 if (!!info->id) { /* if id!=0 => check vid range */
117 if (info->id > VLAN_GROUP_ARRAY_LEN) { 124 if (info->id > VLAN_N_VID) {
118 pr_debug("id %d is out of range (1-4096)\n", 125 pr_debug("id %d is out of range (1-4096)\n",
119 info->id); 126 info->id);
120 return -EINVAL; 127 return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcc102e3be4d..a1dcf83f0d58 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -124,16 +124,23 @@ ebt_dev_check(const char *entry, const struct net_device *device)
124#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) 124#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
125/* process standard matches */ 125/* process standard matches */
126static inline int 126static inline int
127ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, 127ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
128 const struct net_device *in, const struct net_device *out) 128 const struct net_device *in, const struct net_device *out)
129{ 129{
130 const struct ethhdr *h = eth_hdr(skb);
131 __be16 ethproto;
130 int verdict, i; 132 int verdict, i;
131 133
134 if (vlan_tx_tag_present(skb))
135 ethproto = htons(ETH_P_8021Q);
136 else
137 ethproto = h->h_proto;
138
132 if (e->bitmask & EBT_802_3) { 139 if (e->bitmask & EBT_802_3) {
133 if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO)) 140 if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
134 return 1; 141 return 1;
135 } else if (!(e->bitmask & EBT_NOPROTO) && 142 } else if (!(e->bitmask & EBT_NOPROTO) &&
136 FWINV2(e->ethproto != h->h_proto, EBT_IPROTO)) 143 FWINV2(e->ethproto != ethproto, EBT_IPROTO))
137 return 1; 144 return 1;
138 145
139 if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) 146 if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
@@ -213,7 +220,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
213 base = private->entries; 220 base = private->entries;
214 i = 0; 221 i = 0;
215 while (i < nentries) { 222 while (i < nentries) {
216 if (ebt_basic_match(point, eth_hdr(skb), in, out)) 223 if (ebt_basic_match(point, skb, in, out))
217 goto letscontinue; 224 goto letscontinue;
218 225
219 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) 226 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 76ae68303d3a..d522d8c1703e 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -16,11 +16,18 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
16{ 16{
17 struct dev_info *dev_info; 17 struct dev_info *dev_info;
18 enum cfcnfg_phy_preference pref; 18 enum cfcnfg_phy_preference pref;
19 int res;
20
19 memset(l, 0, sizeof(*l)); 21 memset(l, 0, sizeof(*l));
20 l->priority = s->priority; 22 /* In caif protocol low value is high priority */
23 l->priority = CAIF_PRIO_MAX - s->priority + 1;
21 24
22 if (s->link_name[0] != '\0') 25 if (s->ifindex != 0){
23 l->phyid = cfcnfg_get_named(cnfg, s->link_name); 26 res = cfcnfg_get_id_from_ifi(cnfg, s->ifindex);
27 if (res < 0)
28 return res;
29 l->phyid = res;
30 }
24 else { 31 else {
25 switch (s->link_selector) { 32 switch (s->link_selector) {
26 case CAIF_LINK_HIGH_BANDW: 33 case CAIF_LINK_HIGH_BANDW:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d1378..a42a408306e4 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
9 * and Sakari Ailus <sakari.ailus@nokia.com> 9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13
12#include <linux/version.h> 14#include <linux/version.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -171,7 +173,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
171 net = dev_net(dev); 173 net = dev_net(dev);
172 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); 174 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
173 caifd = caif_get(dev); 175 caifd = caif_get(dev);
174 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) 176 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
175 return NET_RX_DROP; 177 return NET_RX_DROP;
176 178
177 if (caifd->layer.up->receive(caifd->layer.up, pkt)) 179 if (caifd->layer.up->receive(caifd->layer.up, pkt))
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
214 216
215 switch (what) { 217 switch (what) {
216 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name); 219 netdev_info(dev, "register\n");
218 caifd = caif_device_alloc(dev); 220 caifd = caif_device_alloc(dev);
219 if (caifd == NULL) 221 if (caifd == NULL)
220 break; 222 break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
225 break; 227 break;
226 228
227 case NETDEV_UP: 229 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name); 230 netdev_info(dev, "up\n");
229 caifd = caif_get(dev); 231 caifd = caif_get(dev);
230 if (caifd == NULL) 232 if (caifd == NULL)
231 break; 233 break;
232 caifdev = netdev_priv(dev); 234 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) { 235 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n", 236 netdev_info(dev, "already up\n");
235 __func__, dev->name);
236 break; 237 break;
237 } 238 }
238 atomic_set(&caifd->state, what); 239 atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
273 caifd = caif_get(dev); 274 caifd = caif_get(dev);
274 if (caifd == NULL) 275 if (caifd == NULL)
275 break; 276 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name); 277 netdev_info(dev, "going down\n");
277 278
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN) 280 atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
295 caifd = caif_get(dev); 296 caifd = caif_get(dev);
296 if (caifd == NULL) 297 if (caifd == NULL)
297 break; 298 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name); 299 netdev_info(dev, "down\n");
299 if (atomic_read(&caifd->in_use)) 300 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): " 301 netdev_warn(dev,
301 "Unregistering an active CAIF device: %s\n", 302 "Unregistering an active CAIF device\n");
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev); 304 dev_put(dev);
305 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
@@ -307,7 +307,9 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name); 310 if (caifd == NULL)
311 break;
312 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 313 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 314 caif_device_destroy(dev);
313 break; 315 break;
@@ -391,7 +393,7 @@ static int __init caif_device_init(void)
391 int result; 393 int result;
392 cfg = cfcnfg_create(); 394 cfg = cfcnfg_create();
393 if (!cfg) { 395 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__); 396 pr_warn("can't create cfcnfg\n");
395 goto err_cfcnfg_create_failed; 397 goto err_cfcnfg_create_failed;
396 } 398 }
397 result = register_pernet_device(&caif_net_ops); 399 result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4bf28f25f368..1bf0cf503796 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/fs.h> 9#include <linux/fs.h>
8#include <linux/init.h> 10#include <linux/init.h>
9#include <linux/module.h> 11#include <linux/module.h>
@@ -15,7 +17,6 @@
15#include <linux/poll.h> 17#include <linux/poll.h>
16#include <linux/tcp.h> 18#include <linux/tcp.h>
17#include <linux/uaccess.h> 19#include <linux/uaccess.h>
18#include <linux/mutex.h>
19#include <linux/debugfs.h> 20#include <linux/debugfs.h>
20#include <linux/caif/caif_socket.h> 21#include <linux/caif/caif_socket.h>
21#include <asm/atomic.h> 22#include <asm/atomic.h>
@@ -28,9 +29,6 @@
28MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF); 30MODULE_ALIAS_NETPROTO(AF_CAIF);
30 31
31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (4096*100)
33
34/* 32/*
35 * CAIF state is re-using the TCP socket states. 33 * CAIF state is re-using the TCP socket states.
36 * caif_states stored in sk_state reflect the state as reported by 34 * caif_states stored in sk_state reflect the state as reported by
@@ -157,9 +155,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
157 155
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 156 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 157 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():" 158 pr_debug("sending flow OFF (queue len = %d %d)\n",
161 " sending flow OFF (queue len = %d %d)\n",
162 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc), 159 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk)); 160 sk_rcvbuf_lowwater(cf_sk));
165 set_rx_flow_off(cf_sk); 161 set_rx_flow_off(cf_sk);
@@ -172,9 +168,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
172 return err; 168 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 169 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk); 170 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():" 171 pr_debug("sending flow OFF due to rmem_schedule\n");
176 " sending flow OFF due to rmem_schedule\n",
177 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off); 172 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 173 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 } 174 }
@@ -275,8 +269,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
275 break; 269 break;
276 270
277 default: 271 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n", 272 pr_debug("Unexpected flow command %d\n", flow);
279 __func__, flow);
280 } 273 }
281} 274}
282 275
@@ -536,8 +529,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
536 529
537 /* Slight paranoia, probably not needed. */ 530 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) { 531 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed," 532 pr_warn("transmit retries failed, error = %d\n", ret);
540 " error = %d\n", __func__, ret);
541 break; 533 break;
542 } 534 }
543 535
@@ -724,8 +716,7 @@ static int setsockopt(struct socket *sock,
724{ 716{
725 struct sock *sk = sock->sk; 717 struct sock *sk = sock->sk;
726 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 718 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
727 int prio, linksel; 719 int linksel;
728 struct ifreq ifreq;
729 720
730 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) 721 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
731 return -ENOPROTOOPT; 722 return -ENOPROTOOPT;
@@ -743,33 +734,6 @@ static int setsockopt(struct socket *sock,
743 release_sock(&cf_sk->sk); 734 release_sock(&cf_sk->sk);
744 return 0; 735 return 0;
745 736
746 case SO_PRIORITY:
747 if (lvl != SOL_SOCKET)
748 goto bad_sol;
749 if (ol < sizeof(int))
750 return -EINVAL;
751 if (copy_from_user(&prio, ov, sizeof(int)))
752 return -EINVAL;
753 lock_sock(&(cf_sk->sk));
754 cf_sk->conn_req.priority = prio;
755 release_sock(&cf_sk->sk);
756 return 0;
757
758 case SO_BINDTODEVICE:
759 if (lvl != SOL_SOCKET)
760 goto bad_sol;
761 if (ol < sizeof(struct ifreq))
762 return -EINVAL;
763 if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
764 return -EFAULT;
765 lock_sock(&(cf_sk->sk));
766 strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
767 sizeof(cf_sk->conn_req.link_name));
768 cf_sk->conn_req.link_name
769 [sizeof(cf_sk->conn_req.link_name)-1] = 0;
770 release_sock(&cf_sk->sk);
771 return 0;
772
773 case CAIFSO_REQ_PARAM: 737 case CAIFSO_REQ_PARAM:
774 if (lvl != SOL_CAIF) 738 if (lvl != SOL_CAIF)
775 goto bad_sol; 739 goto bad_sol;
@@ -888,6 +852,18 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
888 sock->state = SS_CONNECTING; 852 sock->state = SS_CONNECTING;
889 sk->sk_state = CAIF_CONNECTING; 853 sk->sk_state = CAIF_CONNECTING;
890 854
855 /* Check priority value comming from socket */
856 /* if priority value is out of range it will be ajusted */
857 if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
858 cf_sk->conn_req.priority = CAIF_PRIO_MAX;
859 else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN)
860 cf_sk->conn_req.priority = CAIF_PRIO_MIN;
861 else
862 cf_sk->conn_req.priority = cf_sk->sk.sk_priority;
863
864 /*ifindex = id of the interface.*/
865 cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if;
866
891 dbfs_atomic_inc(&cnt.num_connect_req); 867 dbfs_atomic_inc(&cnt.num_connect_req);
892 cf_sk->layer.receive = caif_sktrecv_cb; 868 cf_sk->layer.receive = caif_sktrecv_cb;
893 err = caif_connect_client(&cf_sk->conn_req, 869 err = caif_connect_client(&cf_sk->conn_req,
@@ -912,8 +888,8 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
912 cf_sk->tailroom = tailroom; 888 cf_sk->tailroom = tailroom;
913 cf_sk->maxframe = mtu - (headroom + tailroom); 889 cf_sk->maxframe = mtu - (headroom + tailroom);
914 if (cf_sk->maxframe < 1) { 890 if (cf_sk->maxframe < 1) {
915 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%u)\n", 891 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
916 __func__, mtu); 892 err = -ENODEV;
917 goto out; 893 goto out;
918 } 894 }
919 895
@@ -1132,10 +1108,6 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1132 /* Store the protocol */ 1108 /* Store the protocol */
1133 sk->sk_protocol = (unsigned char) protocol; 1109 sk->sk_protocol = (unsigned char) protocol;
1134 1110
1135 /* Sendbuf dictates the amount of outbound packets not yet sent */
1136 sk->sk_sndbuf = CAIF_DEF_SNDBUF;
1137 sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
1138
1139 /* 1111 /*
1140 * Lock in order to try to stop someone from opening the socket 1112 * Lock in order to try to stop someone from opening the socket
1141 * too early. 1113 * too early.
@@ -1155,7 +1127,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1155 set_rx_flow_on(cf_sk); 1127 set_rx_flow_on(cf_sk);
1156 1128
1157 /* Set default options on configuration */ 1129 /* Set default options on configuration */
1158 cf_sk->conn_req.priority = CAIF_PRIO_NORMAL; 1130 cf_sk->sk.sk_priority= CAIF_PRIO_NORMAL;
1159 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; 1131 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
1160 cf_sk->conn_req.protocol = protocol; 1132 cf_sk->conn_req.protocol = protocol;
1161 /* Increase the number of sockets created. */ 1133 /* Increase the number of sockets created. */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344d..21ede141018a 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/kernel.h> 9#include <linux/kernel.h>
7#include <linux/stddef.h> 10#include <linux/stddef.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
78 /* Initiate this layer */ 81 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) { 83 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__); 84 pr_warn("Out of memory\n");
82 return NULL; 85 return NULL;
83 } 86 }
84 this->mux = cfmuxl_create(); 87 this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
106 layer_set_up(this->ctrl, this); 109 layer_set_up(this->ctrl, this);
107 return this; 110 return this;
108out_of_mem: 111out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__); 112 pr_warn("Out of memory\n");
110 kfree(this->mux); 113 kfree(this->mux);
111 kfree(this->ctrl); 114 kfree(this->ctrl);
112 kfree(this); 115 kfree(this);
@@ -170,18 +173,15 @@ static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
170 return NULL; 173 return NULL;
171} 174}
172 175
173int cfcnfg_get_named(struct cfcnfg *cnfg, char *name) 176
177int cfcnfg_get_id_from_ifi(struct cfcnfg *cnfg, int ifi)
174{ 178{
175 int i; 179 int i;
176 180 for (i = 0; i < MAX_PHY_LAYERS; i++)
177 /* Try to match with specified name */ 181 if (cnfg->phy_layers[i].frm_layer != NULL &&
178 for (i = 0; i < MAX_PHY_LAYERS; i++) { 182 cnfg->phy_layers[i].ifindex == ifi)
179 if (cnfg->phy_layers[i].frm_layer != NULL 183 return i;
180 && strcmp(cnfg->phy_layers[i].phy_layer->name, 184 return -ENODEV;
181 name) == 0)
182 return cnfg->phy_layers[i].frm_layer->id;
183 }
184 return 0;
185} 185}
186 186
187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer) 187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
@@ -194,7 +194,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
194 caif_assert(adap_layer != NULL); 194 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id; 195 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) { 196 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); 197 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
198 ret = -ENOTCONN; 198 ret = -ENOTCONN;
199 goto end; 199 goto end;
200 } 200 }
@@ -204,9 +204,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
204 layer_set_up(servl, NULL); 204 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) { 206 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR " 207 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
208 "- Error removing service_layer Channel_Id(%d)", 208 channel_id);
209 __func__, channel_id);
210 ret = -EINVAL; 209 ret = -EINVAL;
211 goto end; 210 goto end;
212 } 211 }
@@ -216,18 +215,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
216 215
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); 216 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) { 217 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): " 218 pr_warn("No interface to send disconnect to\n");
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV; 219 ret = -ENODEV;
223 goto end; 220 goto end;
224 } 221 }
225 if (phyinfo->id != phyid || 222 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid || 223 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) { 224 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): " 225 pr_err("Inconsistency in phy registration\n");
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL; 226 ret = -EINVAL;
232 goto end; 227 goto end;
233 } 228 }
@@ -276,21 +271,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276{ 271{
277 struct cflayer *frml; 272 struct cflayer *frml;
278 if (adap_layer == NULL) { 273 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__); 274 pr_err("adap_layer is zero\n");
280 return -EINVAL; 275 return -EINVAL;
281 } 276 }
282 if (adap_layer->receive == NULL) { 277 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__); 278 pr_err("adap_layer->receive is NULL\n");
284 return -EINVAL; 279 return -EINVAL;
285 } 280 }
286 if (adap_layer->ctrlcmd == NULL) { 281 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__); 282 pr_err("adap_layer->ctrlcmd == NULL\n");
288 return -EINVAL; 283 return -EINVAL;
289 } 284 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer; 285 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) { 286 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!", 287 pr_err("Specified PHY type does not exist!\n");
293 __func__);
294 return -ENODEV; 288 return -ENODEV;
295 } 289 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 290 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +324,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
330 struct net_device *netdev; 324 struct net_device *netdev;
331 325
332 if (adapt_layer == NULL) { 326 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response " 327 pr_debug("link setup response but no client exist, send linkdown back\n");
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 328 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return; 329 return;
338 } 330 }
@@ -374,13 +366,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 366 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break; 367 break;
376 default: 368 default:
377 pr_err("CAIF: %s(): Protocol error. " 369 pr_err("Protocol error. Link setup response - unknown channel type\n");
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return; 370 return;
381 } 371 }
382 if (!servicel) { 372 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__); 373 pr_warn("Out of memory\n");
384 return; 374 return;
385 } 375 }
386 layer_set_dn(servicel, cnfg->mux); 376 layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +408,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 } 408 }
419 } 409 }
420 if (*phyid == 0) { 410 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__); 411 pr_err("No Available PHY ID\n");
422 return; 412 return;
423 } 413 }
424 414
@@ -427,7 +417,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
427 phy_driver = 417 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 418 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) { 419 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__); 420 pr_warn("Out of memory\n");
431 return; 421 return;
432 } 422 }
433 423
@@ -436,7 +426,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
436 phy_driver = NULL; 426 phy_driver = NULL;
437 break; 427 break;
438 default: 428 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type); 429 pr_err("%d\n", phy_type);
440 return; 430 return;
441 break; 431 break;
442 } 432 }
@@ -455,7 +445,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
455 phy_layer->type = phy_type; 445 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs); 446 frml = cffrml_create(*phyid, fcs);
457 if (!frml) { 447 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__); 448 pr_warn("Out of memory\n");
459 return; 449 return;
460 } 450 }
461 cnfg->phy_layers[*phyid].frm_layer = frml; 451 cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c3..3cd8f978e309 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
36 struct cfctrl *this = 38 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 39 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) { 40 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__); 41 pr_warn("Out of memory\n");
40 return NULL; 42 return NULL;
41 } 43 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 44 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) { 135 if (cfctrl_req_eq(req, p)) {
134 if (p != first) 136 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not " 137 pr_warn("Requests are not received in order\n");
136 "received in order\n",
137 __func__);
138 138
139 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no); 140 p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
177 int ret; 177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) { 179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__); 180 pr_warn("Out of memory\n");
181 return; 181 return;
182 } 182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
189 ret = 189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) { 191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n", 192 pr_err("Could not transmit enum message\n");
193 __func__);
194 cfpkt_destroy(pkt); 193 cfpkt_destroy(pkt);
195 } 194 }
196} 195}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
208 char utility_name[16]; 207 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) { 209 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__); 210 pr_warn("Out of memory\n");
212 return -ENOMEM; 211 return -ENOMEM;
213 } 212 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
253 param->u.utility.paramlen); 252 param->u.utility.paramlen);
254 break; 253 break;
255 default: 254 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n", 255 pr_warn("Request setup of bad link type = %d\n",
257 __func__, param->linktype); 256 param->linktype);
258 return -EINVAL; 257 return -EINVAL;
259 } 258 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL); 259 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) { 260 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__); 261 pr_warn("Out of memory\n");
263 return -ENOMEM; 262 return -ENOMEM;
264 } 263 }
265 req->client_layer = user_layer; 264 req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
276 ret = 275 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) { 277 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n", 278 pr_err("Could not transmit linksetup request\n");
280 __func__);
281 cfpkt_destroy(pkt); 279 cfpkt_destroy(pkt);
282 return -ENODEV; 280 return -ENODEV;
283 } 281 }
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
291 struct cfctrl *cfctrl = container_obj(layer); 289 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 290 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) { 291 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__); 292 pr_warn("Out of memory\n");
295 return -ENOMEM; 293 return -ENOMEM;
296 } 294 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); 295 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
300 ret = 298 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) { 300 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n", 301 pr_err("Could not transmit link-down request\n");
304 __func__);
305 cfpkt_destroy(pkt); 302 cfpkt_destroy(pkt);
306 } 303 }
307 return ret; 304 return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
313 struct cfctrl *cfctrl = container_obj(layer); 310 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) { 312 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__); 313 pr_warn("Out of memory\n");
317 return; 314 return;
318 } 315 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); 316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
330 struct cfctrl *cfctrl = container_obj(layer); 327 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) { 329 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__); 330 pr_warn("Out of memory\n");
334 return; 331 return;
335 } 332 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); 333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
347 struct cfctrl *cfctrl = container_obj(layer); 344 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) { 346 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__); 347 pr_warn("Out of memory\n");
351 return; 348 return;
352 } 349 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); 350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,10 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
364 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__);
368 364
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 365 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) { 366 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__, 367 pr_debug("cancel req :%d\n", p->sequence_no);
372 p->sequence_no);
373 list_del(&p->list); 368 list_del(&p->list);
374 kfree(p); 369 kfree(p);
375 } 370 }
@@ -520,9 +515,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
520 cfpkt_extr_head(pkt, &param, len); 515 cfpkt_extr_head(pkt, &param, len);
521 break; 516 break;
522 default: 517 default:
523 pr_warning("CAIF: %s(): Request setup " 518 pr_warn("Request setup - invalid link type (%d)\n",
524 "- invalid link type (%d)", 519 serv);
525 __func__, serv);
526 goto error; 520 goto error;
527 } 521 }
528 522
@@ -532,9 +526,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
532 526
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 527 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) { 528 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse " 529 pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 530 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0, 531 0,
540 req ? req->client_layer 532 req ? req->client_layer
@@ -556,8 +548,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 548 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break; 549 break;
558 case CFCTRL_CMD_LINK_ERR: 550 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n", 551 pr_err("Frame Error Indication received\n");
560 __func__);
561 cfctrl->res.linkerror_ind(); 552 cfctrl->res.linkerror_ind();
562 break; 553 break;
563 case CFCTRL_CMD_ENUM: 554 case CFCTRL_CMD_ENUM:
@@ -576,7 +567,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
576 cfctrl->res.radioset_rsp(); 567 cfctrl->res.radioset_rsp();
577 break; 568 break;
578 default: 569 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__); 570 pr_err("Unrecognized Control Frame\n");
580 goto error; 571 goto error;
581 break; 572 break;
582 } 573 }
@@ -595,8 +586,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
595 case CAIF_CTRLCMD_FLOW_OFF_IND: 586 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock); 587 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) { 588 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in " 589 pr_debug("Received flow off in control layer\n");
599 "control layer", __func__);
600 } 590 }
601 spin_unlock(&this->info_list_lock); 591 spin_unlock(&this->info_list_lock);
602 break; 592 break;
@@ -620,7 +610,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
620 if (!ctrl->loop_linkused[linkid]) 610 if (!ctrl->loop_linkused[linkid])
621 goto found; 611 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock); 612 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__); 613 pr_err("Out of link-ids\n");
624 return -EINVAL; 614 return -EINVAL;
625found: 615found:
626 if (!ctrl->loop_linkused[linkid]) 616 if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8dd..11a2af4c162a 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,12 +4,16 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
10#include <net/caif/cfsrvl.h> 12#include <net/caif/cfsrvl.h>
11#include <net/caif/cfpkt.h> 13#include <net/caif/cfpkt.h>
12 14
15#define container_obj(layr) ((struct cfsrvl *) layr)
16
13static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt); 17static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
14static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt); 18static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
15 19
@@ -17,7 +21,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{ 21{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 22 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) { 23 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__); 24 pr_warn("Out of memory\n");
21 return NULL; 25 return NULL;
22 } 26 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 27 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -36,5 +40,17 @@ static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
36 40
37static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt) 41static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
38{ 42{
43 struct cfsrvl *service = container_obj(layr);
44 struct caif_payload_info *info;
45 int ret;
46
47 if (!cfsrvl_ready(service, &ret))
48 return ret;
49
50 /* Add info for MUX-layer to route the packet out */
51 info = cfpkt_info(pkt);
52 info->channel_id = service->layer.id;
53 info->dev_info = &service->dev_info;
54
39 return layr->dn->transmit(layr->dn, pkt); 55 return layr->dn->transmit(layr->dn, pkt);
40} 56}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff280..d3ed264ad6c4 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) { 30 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
49 caif_assert(layr->ctrlcmd != NULL); 51 caif_assert(layr->ctrlcmd != NULL);
50 52
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 53 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 54 pr_err("Packet is erroneous!\n");
53 cfpkt_destroy(pkt); 55 cfpkt_destroy(pkt);
54 return -EPROTO; 56 return -EPROTO;
55 } 57 }
56 58
57 if ((cmd & DGM_CMD_BIT) == 0) { 59 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { 60 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 61 pr_err("Packet is erroneous!\n");
60 cfpkt_destroy(pkt); 62 cfpkt_destroy(pkt);
61 return -EPROTO; 63 return -EPROTO;
62 } 64 }
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
75 return 0; 77 return 0;
76 default: 78 default:
77 cfpkt_destroy(pkt); 79 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n", 80 pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
79 __func__, cmd, cmd);
80 return -EPROTO; 81 return -EPROTO;
81 } 82 }
82} 83}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b217..a445043931ae 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
10
9#include <linux/stddef.h> 11#include <linux/stddef.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{ 34{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 35 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) { 36 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__); 37 pr_warn("Out of memory\n");
36 return NULL; 38 return NULL;
37 } 39 }
38 caif_assert(offsetof(struct cffrml, layer) == 0); 40 caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
83 85
84 if (cfpkt_setlen(pkt, len) < 0) { 86 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error; 87 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len); 88 pr_err("Framing length error (%d)\n", len);
87 cfpkt_destroy(pkt); 89 cfpkt_destroy(pkt);
88 return -EPROTO; 90 return -EPROTO;
89 } 91 }
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
99 cfpkt_add_trail(pkt, &tmp, 2); 101 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error; 102 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error; 103 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error " 104 pr_info("Frame checksum error (0x%x != 0x%x)\n",
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks); 105 hdrchks, pktchks);
104 return -EILSEQ; 106 return -EILSEQ;
105 } 107 }
106 } 108 }
107 if (cfpkt_erroneous(pkt)) { 109 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error; 110 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 111 pr_err("Packet is erroneous!\n");
110 cfpkt_destroy(pkt); 112 cfpkt_destroy(pkt);
111 return -EPROTO; 113 return -EPROTO;
112 } 114 }
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
132 cfpkt_add_head(pkt, &tmp, 2); 134 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2; 135 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) { 136 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 137 pr_err("Packet is erroneous!\n");
136 return -EPROTO; 138 return -EPROTO;
137 } 139 }
138 ret = layr->dn->transmit(layr->dn, pkt); 140 ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b258..46f34b2e0478 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/stddef.h> 9#include <linux/stddef.h>
7#include <linux/spinlock.h> 10#include <linux/spinlock.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
190 u8 id; 193 u8 id;
191 struct cflayer *up; 194 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) { 195 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__); 196 pr_err("erroneous Caif Packet\n");
194 cfpkt_destroy(pkt); 197 cfpkt_destroy(pkt);
195 return -EPROTO; 198 return -EPROTO;
196 } 199 }
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
199 up = get_up(muxl, id); 202 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock); 203 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) { 204 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d " 205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
203 "(0x%x) up == NULL", __func__, id, id); 206 id, id);
204 cfpkt_destroy(pkt); 207 cfpkt_destroy(pkt);
205 /* 208 /*
206 * Don't return ERROR, since modem misbehaves and sends out 209 * Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
223 struct caif_payload_info *info = cfpkt_info(pkt); 226 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) { 228 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy " 229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
227 "ID = %d (0x%x)\n", 230 info->dev_info->id, info->dev_info->id);
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN; 231 return -ENOTCONN;
230 } 232 }
231 info->hdr_len += 1; 233 info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793a..d7e865e2ff65 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/string.h> 9#include <linux/string.h>
8#include <linux/skbuff.h> 10#include <linux/skbuff.h>
9#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
12#define PKT_PREFIX 48 14#define PKT_PREFIX 48
13#define PKT_POSTFIX 2 15#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 16#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 17#define PKT_ERROR(pkt, errmsg) \
16 cfpkt_priv(pkt)->erronous = true; \ 18do { \
17 skb_reset_tail_pointer(&pkt->skb); \ 19 cfpkt_priv(pkt)->erronous = true; \
18 pr_warning("CAIF: " errmsg);\ 20 skb_reset_tail_pointer(&pkt->skb); \
19 } while (0) 21 pr_warn(errmsg); \
22} while (0)
20 23
21struct cfpktq { 24struct cfpktq {
22 struct sk_buff_head head; 25 struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
130 return -EPROTO; 133 return -EPROTO;
131 134
132 if (unlikely(len > skb->len)) { 135 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n"); 136 PKT_ERROR(pkt, "read beyond end of packet\n");
134 return -EPROTO; 137 return -EPROTO;
135 } 138 }
136 139
137 if (unlikely(len > skb_headlen(skb))) { 140 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) { 141 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n"); 142 PKT_ERROR(pkt, "linearize failed\n");
140 return -EPROTO; 143 return -EPROTO;
141 } 144 }
142 } 145 }
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
156 return -EPROTO; 159 return -EPROTO;
157 160
158 if (unlikely(skb_linearize(skb) != 0)) { 161 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n"); 162 PKT_ERROR(pkt, "linearize failed\n");
160 return -EPROTO; 163 return -EPROTO;
161 } 164 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) { 165 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n"); 166 PKT_ERROR(pkt, "read beyond end of packet\n");
164 return -EPROTO; 167 return -EPROTO;
165 } 168 }
166 from = skb_tail_pointer(skb) - len; 169 from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
202 205
203 /* Make sure data is writable */ 206 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { 207 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n"); 208 PKT_ERROR(pkt, "cow failed\n");
206 return -EPROTO; 209 return -EPROTO;
207 } 210 }
208 /* 211 /*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
211 * lengths of the top SKB. 214 * lengths of the top SKB.
212 */ 215 */
213 if (lastskb != skb) { 216 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n", 217 pr_warn("Packet is non-linear\n");
215 __func__);
216 skb->len += len; 218 skb->len += len;
217 skb->data_len += len; 219 skb->data_len += len;
218 } 220 }
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
242 if (unlikely(is_erronous(pkt))) 244 if (unlikely(is_erronous(pkt)))
243 return -EPROTO; 245 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) { 246 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n"); 247 PKT_ERROR(pkt, "no headroom\n");
246 return -EPROTO; 248 return -EPROTO;
247 } 249 }
248 250
249 /* Make sure data is writable */ 251 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb); 252 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) { 253 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 254 PKT_ERROR(pkt, "cow failed\n");
253 return ret; 255 return ret;
254 } 256 }
255 257
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
283 if (unlikely(is_erronous(pkt))) 285 if (unlikely(is_erronous(pkt)))
284 return -EPROTO; 286 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) { 287 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n"); 288 PKT_ERROR(pkt, "linearize failed\n");
287 return -EPROTO; 289 return -EPROTO;
288 } 290 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
309 311
310 /* Need to expand SKB */ 312 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) 313 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n"); 314 PKT_ERROR(pkt, "skb_pad_trail failed\n");
313 315
314 return cfpkt_getlen(pkt); 316 return cfpkt_getlen(pkt);
315} 317}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
380 return NULL; 382 return NULL;
381 383
382 if (skb->data + pos > skb_tail_pointer(skb)) { 384 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt, 385 PKT_ERROR(pkt, "trying to split beyond end of packet\n");
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL; 386 return NULL;
386 } 387 }
387 388
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
455 return -EPROTO; 456 return -EPROTO;
456 /* Make sure SKB is writable */ 457 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n"); 459 PKT_ERROR(pkt, "skb_cow_data failed\n");
459 return -EPROTO; 460 return -EPROTO;
460 } 461 }
461 462
462 if (unlikely(skb_linearize(skb) != 0)) { 463 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n"); 464 PKT_ERROR(pkt, "linearize failed\n");
464 return -EPROTO; 465 return -EPROTO;
465 } 466 }
466 467
467 if (unlikely(skb_tailroom(skb) < buflen)) { 468 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n"); 469 PKT_ERROR(pkt, "buffer too short - failed\n");
469 return -EPROTO; 470 return -EPROTO;
470 } 471 }
471 472
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
483 return -EPROTO; 484 return -EPROTO;
484 485
485 if (unlikely(buflen > skb->len)) { 486 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large " 487 PKT_ERROR(pkt, "buflen too large - failed\n");
487 "- failed\n");
488 return -EPROTO; 488 return -EPROTO;
489 } 489 }
490 490
491 if (unlikely(buflen > skb_headlen(skb))) { 491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) { 492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n"); 493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO; 494 return -EPROTO;
495 } 495 }
496 } 496 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d104..e2fb5fa75795 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49 51
50 if (!this) { 52 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__); 53 pr_warn("Out of memory\n");
52 return NULL; 54 return NULL;
53 } 55 }
54 56
@@ -178,9 +180,7 @@ out:
178 cfpkt_destroy(rfml->incomplete_frm); 180 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL; 181 rfml->incomplete_frm = NULL;
180 182
181 pr_info("CAIF: %s(): " 183 pr_info("Connection error %d triggered on RFM link\n", err);
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184 184
185 /* Trigger connection error upon failure.*/ 185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -193,7 +193,7 @@ out:
193 193
194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) 194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
195{ 195{
196 caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); 196 caif_assert(cfpkt_getlen(pkt) < rfml->fragment_size);
197 197
198 /* Add info for MUX-layer to route the packet out. */ 198 /* Add info for MUX-layer to route the packet out. */
199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; 199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
280out: 280out:
281 281
282 if (err != 0) { 282 if (err != 0) {
283 pr_info("CAIF: %s(): " 283 pr_info("Connection error %d triggered on RFM link\n", err);
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/ 284 /* Trigger connection error upon failure.*/
287 285
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 286 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13d..9297f7dea9d8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{ 36{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 37 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) { 38 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
40 caif_assert(offsetof(struct cfserl, layer) == 0); 42 caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a91211..ab5e542526bf 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
79 layr->up->ctrlcmd(layr->up, ctrl, phyid); 81 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break; 82 break;
81 default: 83 default:
82 pr_warning("CAIF: %s(): " 84 pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */ 85 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid); 86 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true; 87 service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
107 u8 flow_on = SRVL_FLOW_ON; 108 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 109 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) { 110 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n", 111 pr_warn("Out of memory\n");
111 __func__);
112 return -ENOMEM; 112 return -ENOMEM;
113 } 113 }
114 114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n", 116 pr_err("Packet is erroneous!\n");
117 __func__);
118 cfpkt_destroy(pkt); 117 cfpkt_destroy(pkt);
119 return -EPROTO; 118 return -EPROTO;
120 } 119 }
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
131 u8 flow_off = SRVL_FLOW_OFF; 130 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 131 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 132 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n", 133 pr_warn("Out of memory\n");
135 __func__);
136 return -ENOMEM; 134 return -ENOMEM;
137 } 135 }
138 136
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 137 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n", 138 pr_err("Packet is erroneous!\n");
141 __func__);
142 cfpkt_destroy(pkt); 139 cfpkt_destroy(pkt);
143 return -EPROTO; 140 return -EPROTO;
144 } 141 }
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a4..efad410e4c82 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) { 30 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
47 caif_assert(layr->up->receive != NULL); 49 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL); 50 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
64 cfpkt_destroy(pkt); 66 cfpkt_destroy(pkt);
65 return 0; 67 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ 68 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n", 69 pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); 70 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false; 71 service->open = false;
71 cfpkt_destroy(pkt); 72 cfpkt_destroy(pkt);
72 return 0; 73 return 0;
73 default: 74 default:
74 cfpkt_destroy(pkt); 75 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n", 76 pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
76 __func__, cmd, cmd);
77 return -EPROTO; 77 return -EPROTO;
78 } 78 }
79} 79}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9a..3b425b189a99 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{ 27{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) { 29 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__); 30 pr_warn("Out of memory\n");
29 return NULL; 31 return NULL;
30 } 32 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 33 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
47 49
48 50
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
67 cfpkt_destroy(pkt); 69 cfpkt_destroy(pkt);
68 return 0; 70 return 0;
69 default: /* SET RS232 PIN */ 71 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n", 72 pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt); 73 cfpkt_destroy(pkt);
73 return -EPROTO; 74 return -EPROTO;
74 } 75 }
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
86 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
87 88
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 90 pr_err("Packet is erroneous!\n");
90 return -EPROTO; 91 return -EPROTO;
91 } 92 }
92 93
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f5..bf6fef2a0eff 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{ 23{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) { 25 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__); 26 pr_warn("Out of memory\n");
25 return NULL; 27 return NULL;
26 } 28 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 29 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{ 40{
39 u32 videoheader; 41 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { 42 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 43 pr_err("Packet is erroneous!\n");
42 cfpkt_destroy(pkt); 44 cfpkt_destroy(pkt);
43 return -EPROTO; 45 return -EPROTO;
44 } 46 }
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec53..84a422c98941 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9
8#include <linux/version.h> 10#include <linux/version.h>
9#include <linux/fs.h> 11#include <linux/fs.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -28,9 +30,6 @@
28#define CONNECT_TIMEOUT (5 * HZ) 30#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 31#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30 32
31#undef pr_debug
32#define pr_debug pr_warning
33
34/*This list is protected by the rtnl lock. */ 33/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list); 34static LIST_HEAD(chnl_net_list);
36 35
@@ -142,8 +141,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid) 141 int phyid)
143{ 142{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 143 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", 144 pr_debug("NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : 145 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : 146 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : 147 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +194,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
196 priv = netdev_priv(dev); 194 priv = netdev_priv(dev);
197 195
198 if (skb->len > priv->netdev->mtu) { 196 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__); 197 pr_warn("Size of skb exceeded MTU\n");
200 return -ENOSPC; 198 return -ENOSPC;
201 } 199 }
202 200
203 if (!priv->flowenabled) { 201 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__); 202 pr_debug("dropping packets flow off\n");
205 return NETDEV_TX_BUSY; 203 return NETDEV_TX_BUSY;
206 } 204 }
207 205
@@ -237,7 +235,7 @@ static int chnl_net_open(struct net_device *dev)
237 ASSERT_RTNL(); 235 ASSERT_RTNL();
238 priv = netdev_priv(dev); 236 priv = netdev_priv(dev);
239 if (!priv) { 237 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); 238 pr_debug("chnl_net_open: no priv\n");
241 return -ENODEV; 239 return -ENODEV;
242 } 240 }
243 241
@@ -246,18 +244,17 @@ static int chnl_net_open(struct net_device *dev)
246 result = caif_connect_client(&priv->conn_req, &priv->chnl, 244 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom); 245 &llifindex, &headroom, &tailroom);
248 if (result != 0) { 246 if (result != 0) {
249 pr_debug("CAIF: %s(): err: " 247 pr_debug("err: "
250 "Unable to register and open device," 248 "Unable to register and open device,"
251 " Err:%d\n", 249 " Err:%d\n",
252 __func__, 250 result);
253 result);
254 goto error; 251 goto error;
255 } 252 }
256 253
257 lldev = dev_get_by_index(dev_net(dev), llifindex); 254 lldev = dev_get_by_index(dev_net(dev), llifindex);
258 255
259 if (lldev == NULL) { 256 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__); 257 pr_debug("no interface?\n");
261 result = -ENODEV; 258 result = -ENODEV;
262 goto error; 259 goto error;
263 } 260 }
@@ -279,9 +276,7 @@ static int chnl_net_open(struct net_device *dev)
279 dev_put(lldev); 276 dev_put(lldev);
280 277
281 if (mtu < 100) { 278 if (mtu < 100) {
282 pr_warning("CAIF: %s(): " 279 pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV; 280 result = -ENODEV;
286 goto error; 281 goto error;
287 } 282 }
@@ -296,33 +291,32 @@ static int chnl_net_open(struct net_device *dev)
296 rtnl_lock(); 291 rtnl_lock();
297 292
298 if (result == -ERESTARTSYS) { 293 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible" 294 pr_debug("wait_event_interruptible woken by a signal\n");
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS; 295 result = -ERESTARTSYS;
302 goto error; 296 goto error;
303 } 297 }
304 298
305 if (result == 0) { 299 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__); 300 pr_debug("connect timeout\n");
307 caif_disconnect_client(&priv->chnl); 301 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED; 302 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__); 303 pr_debug("state disconnected\n");
310 result = -ETIMEDOUT; 304 result = -ETIMEDOUT;
311 goto error; 305 goto error;
312 } 306 }
313 307
314 if (priv->state != CAIF_CONNECTED) { 308 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__); 309 pr_debug("connect failed\n");
316 result = -ECONNREFUSED; 310 result = -ECONNREFUSED;
317 goto error; 311 goto error;
318 } 312 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 313 pr_debug("CAIF Netdevice connected\n");
320 return 0; 314 return 0;
321 315
322error: 316error:
323 caif_disconnect_client(&priv->chnl); 317 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED; 318 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__); 319 pr_debug("state disconnected\n");
326 return result; 320 return result;
327 321
328} 322}
@@ -413,7 +407,7 @@ static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req) 407 struct caif_connect_request *conn_req)
414{ 408{
415 if (!data) { 409 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__); 410 pr_warn("no params data found\n");
417 return; 411 return;
418 } 412 }
419 if (data[IFLA_CAIF_IPV4_CONNID]) 413 if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +436,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
442 436
443 ret = register_netdevice(dev); 437 ret = register_netdevice(dev);
444 if (ret) 438 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n", 439 pr_warn("device rtml registration failed\n");
446 __func__);
447 return ret; 440 return ret;
448} 441}
449 442
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 08ffe9e4be20..6faa8256e10c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
125 struct list_head tx_ops; 125 struct list_head tx_ops;
126 unsigned long dropped_usr_msgs; 126 unsigned long dropped_usr_msgs;
127 struct proc_dir_entry *bcm_proc_read; 127 struct proc_dir_entry *bcm_proc_read;
128 char procname [9]; /* pointer printed in ASCII with \0 */ 128 char procname [20]; /* pointer printed in ASCII with \0 */
129}; 129};
130 130
131static inline struct bcm_sock *bcm_sk(const struct sock *sk) 131static inline struct bcm_sock *bcm_sk(const struct sock *sk)
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..e88f610fdb7b 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -90,23 +90,39 @@ struct raw_sock {
90 can_err_mask_t err_mask; 90 can_err_mask_t err_mask;
91}; 91};
92 92
93/*
94 * Return pointer to store the extra msg flags for raw_recvmsg().
95 * We use the space of one unsigned int beyond the 'struct sockaddr_can'
96 * in skb->cb.
97 */
98static inline unsigned int *raw_flags(struct sk_buff *skb)
99{
100 BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
101 sizeof(unsigned int)));
102
103 /* return pointer after struct sockaddr_can */
104 return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
105}
106
93static inline struct raw_sock *raw_sk(const struct sock *sk) 107static inline struct raw_sock *raw_sk(const struct sock *sk)
94{ 108{
95 return (struct raw_sock *)sk; 109 return (struct raw_sock *)sk;
96} 110}
97 111
98static void raw_rcv(struct sk_buff *skb, void *data) 112static void raw_rcv(struct sk_buff *oskb, void *data)
99{ 113{
100 struct sock *sk = (struct sock *)data; 114 struct sock *sk = (struct sock *)data;
101 struct raw_sock *ro = raw_sk(sk); 115 struct raw_sock *ro = raw_sk(sk);
102 struct sockaddr_can *addr; 116 struct sockaddr_can *addr;
117 struct sk_buff *skb;
118 unsigned int *pflags;
103 119
104 /* check the received tx sock reference */ 120 /* check the received tx sock reference */
105 if (!ro->recv_own_msgs && skb->sk == sk) 121 if (!ro->recv_own_msgs && oskb->sk == sk)
106 return; 122 return;
107 123
108 /* clone the given skb to be able to enqueue it into the rcv queue */ 124 /* clone the given skb to be able to enqueue it into the rcv queue */
109 skb = skb_clone(skb, GFP_ATOMIC); 125 skb = skb_clone(oskb, GFP_ATOMIC);
110 if (!skb) 126 if (!skb)
111 return; 127 return;
112 128
@@ -123,6 +139,14 @@ static void raw_rcv(struct sk_buff *skb, void *data)
123 addr->can_family = AF_CAN; 139 addr->can_family = AF_CAN;
124 addr->can_ifindex = skb->dev->ifindex; 140 addr->can_ifindex = skb->dev->ifindex;
125 141
142 /* add CAN specific message flags for raw_recvmsg() */
143 pflags = raw_flags(skb);
144 *pflags = 0;
145 if (oskb->sk)
146 *pflags |= MSG_DONTROUTE;
147 if (oskb->sk == sk)
148 *pflags |= MSG_CONFIRM;
149
126 if (sock_queue_rcv_skb(sk, skb) < 0) 150 if (sock_queue_rcv_skb(sk, skb) < 0)
127 kfree_skb(skb); 151 kfree_skb(skb);
128} 152}
@@ -647,12 +671,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 671 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
648 if (err < 0) 672 if (err < 0)
649 goto free_skb; 673 goto free_skb;
650 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 674 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
651 if (err < 0) 675 if (err < 0)
652 goto free_skb; 676 goto free_skb;
653 677
654 /* to be able to check the received tx sock reference in raw_rcv() */ 678 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1; 679 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
656 680
657 skb->dev = dev; 681 skb->dev = dev;
658 skb->sk = sk; 682 skb->sk = sk;
@@ -707,6 +731,9 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
707 memcpy(msg->msg_name, skb->cb, msg->msg_namelen); 731 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
708 } 732 }
709 733
734 /* assign the flags that have been recorded in raw_rcv() */
735 msg->msg_flags |= *(raw_flags(skb));
736
710 skb_free_datagram(sk, skb); 737 skb_free_datagram(sk, skb);
711 738
712 return size; 739 return size;
diff --git a/net/compat.c b/net/compat.c
index 63d260e81472..3649d5895361 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -41,10 +41,12 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
41 compat_size_t len; 41 compat_size_t len;
42 42
43 if (get_user(len, &uiov32->iov_len) || 43 if (get_user(len, &uiov32->iov_len) ||
44 get_user(buf, &uiov32->iov_base)) { 44 get_user(buf, &uiov32->iov_base))
45 tot_len = -EFAULT; 45 return -EFAULT;
46 break; 46
47 } 47 if (len > INT_MAX - tot_len)
48 len = INT_MAX - tot_len;
49
48 tot_len += len; 50 tot_len += len;
49 kiov->iov_base = compat_ptr(buf); 51 kiov->iov_base = compat_ptr(buf);
50 kiov->iov_len = (__kernel_size_t) len; 52 kiov->iov_len = (__kernel_size_t) len;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 282806ba7a57..cd1e039c8755 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -747,13 +747,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
747 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 747 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
748 mask |= POLLERR; 748 mask |= POLLERR;
749 if (sk->sk_shutdown & RCV_SHUTDOWN) 749 if (sk->sk_shutdown & RCV_SHUTDOWN)
750 mask |= POLLRDHUP; 750 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
751 if (sk->sk_shutdown == SHUTDOWN_MASK) 751 if (sk->sk_shutdown == SHUTDOWN_MASK)
752 mask |= POLLHUP; 752 mask |= POLLHUP;
753 753
754 /* readable? */ 754 /* readable? */
755 if (!skb_queue_empty(&sk->sk_receive_queue) || 755 if (!skb_queue_empty(&sk->sk_receive_queue))
756 (sk->sk_shutdown & RCV_SHUTDOWN))
757 mask |= POLLIN | POLLRDNORM; 756 mask |= POLLIN | POLLRDNORM;
758 757
759 /* Connection-based need to check for termination and startup */ 758 /* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ec85e27beed..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
131#include <trace/events/net.h> 131#include <trace/events/net.h>
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h>
134 135
135#include "net-sysfs.h" 136#include "net-sysfs.h"
136 137
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
373 * --ANK (980803) 374 * --ANK (980803)
374 */ 375 */
375 376
377static inline struct list_head *ptype_head(const struct packet_type *pt)
378{
379 if (pt->type == htons(ETH_P_ALL))
380 return &ptype_all;
381 else
382 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
383}
384
376/** 385/**
377 * dev_add_pack - add packet handler 386 * dev_add_pack - add packet handler
378 * @pt: packet type declaration 387 * @pt: packet type declaration
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
388 397
389void dev_add_pack(struct packet_type *pt) 398void dev_add_pack(struct packet_type *pt)
390{ 399{
391 int hash; 400 struct list_head *head = ptype_head(pt);
392 401
393 spin_lock_bh(&ptype_lock); 402 spin_lock(&ptype_lock);
394 if (pt->type == htons(ETH_P_ALL)) 403 list_add_rcu(&pt->list, head);
395 list_add_rcu(&pt->list, &ptype_all); 404 spin_unlock(&ptype_lock);
396 else {
397 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
398 list_add_rcu(&pt->list, &ptype_base[hash]);
399 }
400 spin_unlock_bh(&ptype_lock);
401} 405}
402EXPORT_SYMBOL(dev_add_pack); 406EXPORT_SYMBOL(dev_add_pack);
403 407
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack);
416 */ 420 */
417void __dev_remove_pack(struct packet_type *pt) 421void __dev_remove_pack(struct packet_type *pt)
418{ 422{
419 struct list_head *head; 423 struct list_head *head = ptype_head(pt);
420 struct packet_type *pt1; 424 struct packet_type *pt1;
421 425
422 spin_lock_bh(&ptype_lock); 426 spin_lock(&ptype_lock);
423
424 if (pt->type == htons(ETH_P_ALL))
425 head = &ptype_all;
426 else
427 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
428 427
429 list_for_each_entry(pt1, head, list) { 428 list_for_each_entry(pt1, head, list) {
430 if (pt == pt1) { 429 if (pt == pt1) {
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt)
435 434
436 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 435 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
437out: 436out:
438 spin_unlock_bh(&ptype_lock); 437 spin_unlock(&ptype_lock);
439} 438}
440EXPORT_SYMBOL(__dev_remove_pack); 439EXPORT_SYMBOL(__dev_remove_pack);
441 440
@@ -1486,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1486 skb_orphan(skb); 1485 skb_orphan(skb);
1487 nf_reset(skb); 1486 nf_reset(skb);
1488 1487
1489 if (!(dev->flags & IFF_UP) || 1488 if (unlikely(!(dev->flags & IFF_UP) ||
1490 (skb->len > (dev->mtu + dev->hard_header_len))) { 1489 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1490 atomic_long_inc(&dev->rx_dropped);
1491 kfree_skb(skb); 1491 kfree_skb(skb);
1492 return NET_RX_DROP; 1492 return NET_RX_DROP;
1493 } 1493 }
@@ -1555,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1557 */ 1557 */
1558void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 1558int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1559{ 1559{
1560 unsigned int real_num = dev->real_num_tx_queues; 1560 if (txq < 1 || txq > dev->num_tx_queues)
1561 return -EINVAL;
1561 1562
1562 if (unlikely(txq > dev->num_tx_queues)) 1563 if (dev->reg_state == NETREG_REGISTERED) {
1563 ; 1564 ASSERT_RTNL();
1564 else if (txq > real_num) 1565
1565 dev->real_num_tx_queues = txq; 1566 if (txq < dev->real_num_tx_queues)
1566 else if (txq < real_num) { 1567 qdisc_reset_all_tx_gt(dev, txq);
1567 dev->real_num_tx_queues = txq;
1568 qdisc_reset_all_tx_gt(dev, txq);
1569 } 1568 }
1569
1570 dev->real_num_tx_queues = txq;
1571 return 0;
1570} 1572}
1571EXPORT_SYMBOL(netif_set_real_num_tx_queues); 1573EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1572 1574
1575#ifdef CONFIG_RPS
1576/**
1577 * netif_set_real_num_rx_queues - set actual number of RX queues used
1578 * @dev: Network device
1579 * @rxq: Actual number of RX queues
1580 *
1581 * This must be called either with the rtnl_lock held or before
1582 * registration of the net device. Returns 0 on success, or a
1583 * negative error code. If called before registration, it always
1584 * succeeds.
1585 */
1586int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1587{
1588 int rc;
1589
1590 if (rxq < 1 || rxq > dev->num_rx_queues)
1591 return -EINVAL;
1592
1593 if (dev->reg_state == NETREG_REGISTERED) {
1594 ASSERT_RTNL();
1595
1596 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1597 rxq);
1598 if (rc)
1599 return rc;
1600 }
1601
1602 dev->real_num_rx_queues = rxq;
1603 return 0;
1604}
1605EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1606#endif
1607
1573static inline void __netif_reschedule(struct Qdisc *q) 1608static inline void __netif_reschedule(struct Qdisc *q)
1574{ 1609{
1575 struct softnet_data *sd; 1610 struct softnet_data *sd;
@@ -1650,10 +1685,10 @@ EXPORT_SYMBOL(netif_device_attach);
1650 1685
1651static bool can_checksum_protocol(unsigned long features, __be16 protocol) 1686static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1652{ 1687{
1653 return ((features & NETIF_F_GEN_CSUM) || 1688 return ((features & NETIF_F_NO_CSUM) ||
1654 ((features & NETIF_F_IP_CSUM) && 1689 ((features & NETIF_F_V4_CSUM) &&
1655 protocol == htons(ETH_P_IP)) || 1690 protocol == htons(ETH_P_IP)) ||
1656 ((features & NETIF_F_IPV6_CSUM) && 1691 ((features & NETIF_F_V6_CSUM) &&
1657 protocol == htons(ETH_P_IPV6)) || 1692 protocol == htons(ETH_P_IPV6)) ||
1658 ((features & NETIF_F_FCOE_CRC) && 1693 ((features & NETIF_F_FCOE_CRC) &&
1659 protocol == htons(ETH_P_FCOE))); 1694 protocol == htons(ETH_P_FCOE)));
@@ -1661,17 +1696,18 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1661 1696
1662static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1663{ 1698{
1664 if (can_checksum_protocol(dev->features, skb->protocol)) 1699 __be16 protocol = skb->protocol;
1665 return true; 1700 int features = dev->features;
1666 1701
1667 if (skb->protocol == htons(ETH_P_8021Q)) { 1702 if (vlan_tx_tag_present(skb)) {
1703 features &= dev->vlan_features;
1704 } else if (protocol == htons(ETH_P_8021Q)) {
1668 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 1705 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1669 if (can_checksum_protocol(dev->features & dev->vlan_features, 1706 protocol = veh->h_vlan_encapsulated_proto;
1670 veh->h_vlan_encapsulated_proto)) 1707 features &= dev->vlan_features;
1671 return true;
1672 } 1708 }
1673 1709
1674 return false; 1710 return can_checksum_protocol(features, protocol);
1675} 1711}
1676 1712
1677/** 1713/**
@@ -1760,6 +1796,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1760 __be16 type = skb->protocol; 1796 __be16 type = skb->protocol;
1761 int err; 1797 int err;
1762 1798
1799 if (type == htons(ETH_P_8021Q)) {
1800 struct vlan_ethhdr *veh;
1801
1802 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1803 return ERR_PTR(-EINVAL);
1804
1805 veh = (struct vlan_ethhdr *)skb->data;
1806 type = veh->h_vlan_encapsulated_proto;
1807 }
1808
1763 skb_reset_mac_header(skb); 1809 skb_reset_mac_header(skb);
1764 skb->mac_len = skb->network_header - skb->mac_header; 1810 skb->mac_len = skb->network_header - skb->mac_header;
1765 __skb_pull(skb, skb->mac_len); 1811 __skb_pull(skb, skb->mac_len);
@@ -1904,14 +1950,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1904 1950
1905/* 1951/*
1906 * Try to orphan skb early, right before transmission by the device. 1952 * Try to orphan skb early, right before transmission by the device.
1907 * We cannot orphan skb if tx timestamp is requested, since 1953 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1908 * drivers need to call skb_tstamp_tx() to send the timestamp. 1954 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1909 */ 1955 */
1910static inline void skb_orphan_try(struct sk_buff *skb) 1956static inline void skb_orphan_try(struct sk_buff *skb)
1911{ 1957{
1912 struct sock *sk = skb->sk; 1958 struct sock *sk = skb->sk;
1913 1959
1914 if (sk && !skb_tx(skb)->flags) { 1960 if (sk && !skb_shinfo(skb)->tx_flags) {
1915 /* skb_tx_hash() wont be able to get sk. 1961 /* skb_tx_hash() wont be able to get sk.
1916 * We copy sk_hash into skb->rxhash 1962 * We copy sk_hash into skb->rxhash
1917 */ 1963 */
@@ -1931,9 +1977,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1931static inline int skb_needs_linearize(struct sk_buff *skb, 1977static inline int skb_needs_linearize(struct sk_buff *skb,
1932 struct net_device *dev) 1978 struct net_device *dev)
1933{ 1979{
1980 int features = dev->features;
1981
1982 if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
1983 features &= dev->vlan_features;
1984
1934 return skb_is_nonlinear(skb) && 1985 return skb_is_nonlinear(skb) &&
1935 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1986 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1936 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1987 (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1937 illegal_highdma(dev, skb)))); 1988 illegal_highdma(dev, skb))));
1938} 1989}
1939 1990
@@ -1956,6 +2007,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1956 2007
1957 skb_orphan_try(skb); 2008 skb_orphan_try(skb);
1958 2009
2010 if (vlan_tx_tag_present(skb) &&
2011 !(dev->features & NETIF_F_HW_VLAN_TX)) {
2012 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2013 if (unlikely(!skb))
2014 goto out;
2015
2016 skb->vlan_tci = 0;
2017 }
2018
1959 if (netif_needs_gso(dev, skb)) { 2019 if (netif_needs_gso(dev, skb)) {
1960 if (unlikely(dev_gso_segment(skb))) 2020 if (unlikely(dev_gso_segment(skb)))
1961 goto out_kfree_skb; 2021 goto out_kfree_skb;
@@ -2019,6 +2079,7 @@ out_kfree_gso_skb:
2019 skb->destructor = DEV_GSO_CB(skb)->destructor; 2079 skb->destructor = DEV_GSO_CB(skb)->destructor;
2020out_kfree_skb: 2080out_kfree_skb:
2021 kfree_skb(skb); 2081 kfree_skb(skb);
2082out:
2022 return rc; 2083 return rc;
2023} 2084}
2024 2085
@@ -2070,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2070 } else { 2131 } else {
2071 struct sock *sk = skb->sk; 2132 struct sock *sk = skb->sk;
2072 queue_index = sk_tx_queue_get(sk); 2133 queue_index = sk_tx_queue_get(sk);
2073 if (queue_index < 0) { 2134 if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
2074 2135
2075 queue_index = 0; 2136 queue_index = 0;
2076 if (dev->real_num_tx_queues > 1) 2137 if (dev->real_num_tx_queues > 1)
@@ -2147,6 +2208,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2147 return rc; 2208 return rc;
2148} 2209}
2149 2210
2211static DEFINE_PER_CPU(int, xmit_recursion);
2212#define RECURSION_LIMIT 10
2213
2150/** 2214/**
2151 * dev_queue_xmit - transmit a buffer 2215 * dev_queue_xmit - transmit a buffer
2152 * @skb: buffer to transmit 2216 * @skb: buffer to transmit
@@ -2213,10 +2277,15 @@ int dev_queue_xmit(struct sk_buff *skb)
2213 2277
2214 if (txq->xmit_lock_owner != cpu) { 2278 if (txq->xmit_lock_owner != cpu) {
2215 2279
2280 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2281 goto recursion_alert;
2282
2216 HARD_TX_LOCK(dev, txq, cpu); 2283 HARD_TX_LOCK(dev, txq, cpu);
2217 2284
2218 if (!netif_tx_queue_stopped(txq)) { 2285 if (!netif_tx_queue_stopped(txq)) {
2286 __this_cpu_inc(xmit_recursion);
2219 rc = dev_hard_start_xmit(skb, dev, txq); 2287 rc = dev_hard_start_xmit(skb, dev, txq);
2288 __this_cpu_dec(xmit_recursion);
2220 if (dev_xmit_complete(rc)) { 2289 if (dev_xmit_complete(rc)) {
2221 HARD_TX_UNLOCK(dev, txq); 2290 HARD_TX_UNLOCK(dev, txq);
2222 goto out; 2291 goto out;
@@ -2228,7 +2297,9 @@ int dev_queue_xmit(struct sk_buff *skb)
2228 "queue packet!\n", dev->name); 2297 "queue packet!\n", dev->name);
2229 } else { 2298 } else {
2230 /* Recursion is detected! It is possible, 2299 /* Recursion is detected! It is possible,
2231 * unfortunately */ 2300 * unfortunately
2301 */
2302recursion_alert:
2232 if (net_ratelimit()) 2303 if (net_ratelimit())
2233 printk(KERN_CRIT "Dead loop on virtual device " 2304 printk(KERN_CRIT "Dead loop on virtual device "
2234 "%s, fix it urgently!\n", dev->name); 2305 "%s, fix it urgently!\n", dev->name);
@@ -2264,69 +2335,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2264 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2335 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2265} 2336}
2266 2337
2267#ifdef CONFIG_RPS
2268
2269/* One global table that all flow-based protocols share. */
2270struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2271EXPORT_SYMBOL(rps_sock_flow_table);
2272
2273/* 2338/*
2274 * get_rps_cpu is called from netif_receive_skb and returns the target 2339 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2275 * CPU from the RPS map of the receiving queue for a given skb. 2340 * and src/dst port numbers. Returns a non-zero hash number on success
2276 * rcu_read_lock must be held on entry. 2341 * and 0 on failure.
2277 */ 2342 */
2278static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2343__u32 __skb_get_rxhash(struct sk_buff *skb)
2279 struct rps_dev_flow **rflowp)
2280{ 2344{
2345 int nhoff, hash = 0, poff;
2281 struct ipv6hdr *ip6; 2346 struct ipv6hdr *ip6;
2282 struct iphdr *ip; 2347 struct iphdr *ip;
2283 struct netdev_rx_queue *rxqueue;
2284 struct rps_map *map;
2285 struct rps_dev_flow_table *flow_table;
2286 struct rps_sock_flow_table *sock_flow_table;
2287 int cpu = -1;
2288 u8 ip_proto; 2348 u8 ip_proto;
2289 u16 tcpu;
2290 u32 addr1, addr2, ihl; 2349 u32 addr1, addr2, ihl;
2291 union { 2350 union {
2292 u32 v32; 2351 u32 v32;
2293 u16 v16[2]; 2352 u16 v16[2];
2294 } ports; 2353 } ports;
2295 2354
2296 if (skb_rx_queue_recorded(skb)) { 2355 nhoff = skb_network_offset(skb);
2297 u16 index = skb_get_rx_queue(skb);
2298 if (unlikely(index >= dev->num_rx_queues)) {
2299 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2300 "on queue %u, but number of RX queues is %u\n",
2301 dev->name, index, dev->num_rx_queues);
2302 goto done;
2303 }
2304 rxqueue = dev->_rx + index;
2305 } else
2306 rxqueue = dev->_rx;
2307
2308 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2309 goto done;
2310
2311 if (skb->rxhash)
2312 goto got_hash; /* Skip hash computation on packet header */
2313 2356
2314 switch (skb->protocol) { 2357 switch (skb->protocol) {
2315 case __constant_htons(ETH_P_IP): 2358 case __constant_htons(ETH_P_IP):
2316 if (!pskb_may_pull(skb, sizeof(*ip))) 2359 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2317 goto done; 2360 goto done;
2318 2361
2319 ip = (struct iphdr *) skb->data; 2362 ip = (struct iphdr *) (skb->data + nhoff);
2320 ip_proto = ip->protocol; 2363 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2364 ip_proto = 0;
2365 else
2366 ip_proto = ip->protocol;
2321 addr1 = (__force u32) ip->saddr; 2367 addr1 = (__force u32) ip->saddr;
2322 addr2 = (__force u32) ip->daddr; 2368 addr2 = (__force u32) ip->daddr;
2323 ihl = ip->ihl; 2369 ihl = ip->ihl;
2324 break; 2370 break;
2325 case __constant_htons(ETH_P_IPV6): 2371 case __constant_htons(ETH_P_IPV6):
2326 if (!pskb_may_pull(skb, sizeof(*ip6))) 2372 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2327 goto done; 2373 goto done;
2328 2374
2329 ip6 = (struct ipv6hdr *) skb->data; 2375 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2330 ip_proto = ip6->nexthdr; 2376 ip_proto = ip6->nexthdr;
2331 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2377 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2332 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2378 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2335,33 +2381,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2335 default: 2381 default:
2336 goto done; 2382 goto done;
2337 } 2383 }
2338 switch (ip_proto) { 2384
2339 case IPPROTO_TCP: 2385 ports.v32 = 0;
2340 case IPPROTO_UDP: 2386 poff = proto_ports_offset(ip_proto);
2341 case IPPROTO_DCCP: 2387 if (poff >= 0) {
2342 case IPPROTO_ESP: 2388 nhoff += ihl * 4 + poff;
2343 case IPPROTO_AH: 2389 if (pskb_may_pull(skb, nhoff + 4)) {
2344 case IPPROTO_SCTP: 2390 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2345 case IPPROTO_UDPLITE:
2346 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2347 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2348 if (ports.v16[1] < ports.v16[0]) 2391 if (ports.v16[1] < ports.v16[0])
2349 swap(ports.v16[0], ports.v16[1]); 2392 swap(ports.v16[0], ports.v16[1]);
2350 break;
2351 } 2393 }
2352 default:
2353 ports.v32 = 0;
2354 break;
2355 } 2394 }
2356 2395
2357 /* get a consistent hash (same value on both flow directions) */ 2396 /* get a consistent hash (same value on both flow directions) */
2358 if (addr2 < addr1) 2397 if (addr2 < addr1)
2359 swap(addr1, addr2); 2398 swap(addr1, addr2);
2360 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2361 if (!skb->rxhash)
2362 skb->rxhash = 1;
2363 2399
2364got_hash: 2400 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2401 if (!hash)
2402 hash = 1;
2403
2404done:
2405 return hash;
2406}
2407EXPORT_SYMBOL(__skb_get_rxhash);
2408
2409#ifdef CONFIG_RPS
2410
2411/* One global table that all flow-based protocols share. */
2412struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2413EXPORT_SYMBOL(rps_sock_flow_table);
2414
2415/*
2416 * get_rps_cpu is called from netif_receive_skb and returns the target
2417 * CPU from the RPS map of the receiving queue for a given skb.
2418 * rcu_read_lock must be held on entry.
2419 */
2420static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2421 struct rps_dev_flow **rflowp)
2422{
2423 struct netdev_rx_queue *rxqueue;
2424 struct rps_map *map;
2425 struct rps_dev_flow_table *flow_table;
2426 struct rps_sock_flow_table *sock_flow_table;
2427 int cpu = -1;
2428 u16 tcpu;
2429
2430 if (skb_rx_queue_recorded(skb)) {
2431 u16 index = skb_get_rx_queue(skb);
2432 if (unlikely(index >= dev->real_num_rx_queues)) {
2433 WARN_ONCE(dev->real_num_rx_queues > 1,
2434 "%s received packet on queue %u, but number "
2435 "of RX queues is %u\n",
2436 dev->name, index, dev->real_num_rx_queues);
2437 goto done;
2438 }
2439 rxqueue = dev->_rx + index;
2440 } else
2441 rxqueue = dev->_rx;
2442
2443 map = rcu_dereference(rxqueue->rps_map);
2444 if (map) {
2445 if (map->len == 1) {
2446 tcpu = map->cpus[0];
2447 if (cpu_online(tcpu))
2448 cpu = tcpu;
2449 goto done;
2450 }
2451 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
2452 goto done;
2453 }
2454
2455 skb_reset_network_header(skb);
2456 if (!skb_get_rxhash(skb))
2457 goto done;
2458
2365 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2459 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2366 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2460 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2367 if (flow_table && sock_flow_table) { 2461 if (flow_table && sock_flow_table) {
@@ -2401,7 +2495,6 @@ got_hash:
2401 } 2495 }
2402 } 2496 }
2403 2497
2404 map = rcu_dereference(rxqueue->rps_map);
2405 if (map) { 2498 if (map) {
2406 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2499 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2407 2500
@@ -2487,6 +2580,7 @@ enqueue:
2487 2580
2488 local_irq_restore(flags); 2581 local_irq_restore(flags);
2489 2582
2583 atomic_long_inc(&skb->dev->rx_dropped);
2490 kfree_skb(skb); 2584 kfree_skb(skb);
2491 return NET_RX_DROP; 2585 return NET_RX_DROP;
2492} 2586}
@@ -2643,11 +2737,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2643 * the ingress scheduler, you just cant add policies on ingress. 2737 * the ingress scheduler, you just cant add policies on ingress.
2644 * 2738 *
2645 */ 2739 */
2646static int ing_filter(struct sk_buff *skb) 2740static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
2647{ 2741{
2648 struct net_device *dev = skb->dev; 2742 struct net_device *dev = skb->dev;
2649 u32 ttl = G_TC_RTTL(skb->tc_verd); 2743 u32 ttl = G_TC_RTTL(skb->tc_verd);
2650 struct netdev_queue *rxq;
2651 int result = TC_ACT_OK; 2744 int result = TC_ACT_OK;
2652 struct Qdisc *q; 2745 struct Qdisc *q;
2653 2746
@@ -2661,8 +2754,6 @@ static int ing_filter(struct sk_buff *skb)
2661 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2754 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2662 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2755 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2663 2756
2664 rxq = &dev->rx_queue;
2665
2666 q = rxq->qdisc; 2757 q = rxq->qdisc;
2667 if (q != &noop_qdisc) { 2758 if (q != &noop_qdisc) {
2668 spin_lock(qdisc_lock(q)); 2759 spin_lock(qdisc_lock(q));
@@ -2678,7 +2769,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2678 struct packet_type **pt_prev, 2769 struct packet_type **pt_prev,
2679 int *ret, struct net_device *orig_dev) 2770 int *ret, struct net_device *orig_dev)
2680{ 2771{
2681 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2772 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
2773
2774 if (!rxq || rxq->qdisc == &noop_qdisc)
2682 goto out; 2775 goto out;
2683 2776
2684 if (*pt_prev) { 2777 if (*pt_prev) {
@@ -2686,7 +2779,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2686 *pt_prev = NULL; 2779 *pt_prev = NULL;
2687 } 2780 }
2688 2781
2689 switch (ing_filter(skb)) { 2782 switch (ing_filter(skb, rxq)) {
2690 case TC_ACT_SHOT: 2783 case TC_ACT_SHOT:
2691 case TC_ACT_STOLEN: 2784 case TC_ACT_STOLEN:
2692 kfree_skb(skb); 2785 kfree_skb(skb);
@@ -2699,33 +2792,6 @@ out:
2699} 2792}
2700#endif 2793#endif
2701 2794
2702/*
2703 * netif_nit_deliver - deliver received packets to network taps
2704 * @skb: buffer
2705 *
2706 * This function is used to deliver incoming packets to network
2707 * taps. It should be used when the normal netif_receive_skb path
2708 * is bypassed, for example because of VLAN acceleration.
2709 */
2710void netif_nit_deliver(struct sk_buff *skb)
2711{
2712 struct packet_type *ptype;
2713
2714 if (list_empty(&ptype_all))
2715 return;
2716
2717 skb_reset_network_header(skb);
2718 skb_reset_transport_header(skb);
2719 skb->mac_len = skb->network_header - skb->mac_header;
2720
2721 rcu_read_lock();
2722 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2723 if (!ptype->dev || ptype->dev == skb->dev)
2724 deliver_skb(skb, ptype, skb->dev);
2725 }
2726 rcu_read_unlock();
2727}
2728
2729/** 2795/**
2730 * netdev_rx_handler_register - register receive handler 2796 * netdev_rx_handler_register - register receive handler
2731 * @dev: device to register a handler for 2797 * @dev: device to register a handler for
@@ -2836,8 +2902,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
2836 net_timestamp_check(skb); 2902 net_timestamp_check(skb);
2837 2903
2838 trace_netif_receive_skb(skb); 2904 trace_netif_receive_skb(skb);
2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2840 return NET_RX_SUCCESS;
2841 2905
2842 /* if we've gotten here through NAPI, check netpoll */ 2906 /* if we've gotten here through NAPI, check netpoll */
2843 if (netpoll_receive_skb(skb)) 2907 if (netpoll_receive_skb(skb))
@@ -2851,8 +2915,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2851 * be delivered to pkt handlers that are exact matches. Also 2915 * be delivered to pkt handlers that are exact matches. Also
2852 * the deliver_no_wcard flag will be set. If packet handlers 2916 * the deliver_no_wcard flag will be set. If packet handlers
2853 * are sensitive to duplicate packets these skbs will need to 2917 * are sensitive to duplicate packets these skbs will need to
2854 * be dropped at the handler. The vlan accel path may have 2918 * be dropped at the handler.
2855 * already set the deliver_no_wcard flag.
2856 */ 2919 */
2857 null_or_orig = NULL; 2920 null_or_orig = NULL;
2858 orig_dev = skb->dev; 2921 orig_dev = skb->dev;
@@ -2911,6 +2974,18 @@ ncls:
2911 goto out; 2974 goto out;
2912 } 2975 }
2913 2976
2977 if (vlan_tx_tag_present(skb)) {
2978 if (pt_prev) {
2979 ret = deliver_skb(skb, pt_prev, orig_dev);
2980 pt_prev = NULL;
2981 }
2982 if (vlan_hwaccel_do_receive(&skb)) {
2983 ret = __netif_receive_skb(skb);
2984 goto out;
2985 } else if (unlikely(!skb))
2986 goto out;
2987 }
2988
2914 /* 2989 /*
2915 * Make sure frames received on VLAN interfaces stacked on 2990 * Make sure frames received on VLAN interfaces stacked on
2916 * bonding interfaces still make their way to any base bonding 2991 * bonding interfaces still make their way to any base bonding
@@ -2938,6 +3013,7 @@ ncls:
2938 if (pt_prev) { 3013 if (pt_prev) {
2939 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3014 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2940 } else { 3015 } else {
3016 atomic_long_inc(&skb->dev->rx_dropped);
2941 kfree_skb(skb); 3017 kfree_skb(skb);
2942 /* Jamal, now you will not able to escape explaining 3018 /* Jamal, now you will not able to escape explaining
2943 * me how you were going to use this. :-) 3019 * me how you were going to use this. :-)
@@ -3058,7 +3134,7 @@ out:
3058 return netif_receive_skb(skb); 3134 return netif_receive_skb(skb);
3059} 3135}
3060 3136
3061static void napi_gro_flush(struct napi_struct *napi) 3137inline void napi_gro_flush(struct napi_struct *napi)
3062{ 3138{
3063 struct sk_buff *skb, *next; 3139 struct sk_buff *skb, *next;
3064 3140
@@ -3071,6 +3147,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3071 napi->gro_count = 0; 3147 napi->gro_count = 0;
3072 napi->gro_list = NULL; 3148 napi->gro_list = NULL;
3073} 3149}
3150EXPORT_SYMBOL(napi_gro_flush);
3074 3151
3075enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3152enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3076{ 3153{
@@ -3085,7 +3162,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3085 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3162 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3086 goto normal; 3163 goto normal;
3087 3164
3088 if (skb_is_gso(skb) || skb_has_frags(skb)) 3165 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3089 goto normal; 3166 goto normal;
3090 3167
3091 rcu_read_lock(); 3168 rcu_read_lock();
@@ -3164,16 +3241,19 @@ normal:
3164} 3241}
3165EXPORT_SYMBOL(dev_gro_receive); 3242EXPORT_SYMBOL(dev_gro_receive);
3166 3243
3167static gro_result_t 3244static inline gro_result_t
3168__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3245__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3169{ 3246{
3170 struct sk_buff *p; 3247 struct sk_buff *p;
3171 3248
3172 for (p = napi->gro_list; p; p = p->next) { 3249 for (p = napi->gro_list; p; p = p->next) {
3173 NAPI_GRO_CB(p)->same_flow = 3250 unsigned long diffs;
3174 (p->dev == skb->dev) && 3251
3175 !compare_ether_header(skb_mac_header(p), 3252 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3253 diffs |= p->vlan_tci ^ skb->vlan_tci;
3254 diffs |= compare_ether_header(skb_mac_header(p),
3176 skb_gro_mac_header(skb)); 3255 skb_gro_mac_header(skb));
3256 NAPI_GRO_CB(p)->same_flow = !diffs;
3177 NAPI_GRO_CB(p)->flush = 0; 3257 NAPI_GRO_CB(p)->flush = 0;
3178 } 3258 }
3179 3259
@@ -3226,14 +3306,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3226} 3306}
3227EXPORT_SYMBOL(napi_gro_receive); 3307EXPORT_SYMBOL(napi_gro_receive);
3228 3308
3229void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3309static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3230{ 3310{
3231 __skb_pull(skb, skb_headlen(skb)); 3311 __skb_pull(skb, skb_headlen(skb));
3232 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); 3312 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3313 skb->vlan_tci = 0;
3233 3314
3234 napi->skb = skb; 3315 napi->skb = skb;
3235} 3316}
3236EXPORT_SYMBOL(napi_reuse_skb);
3237 3317
3238struct sk_buff *napi_get_frags(struct napi_struct *napi) 3318struct sk_buff *napi_get_frags(struct napi_struct *napi)
3239{ 3319{
@@ -4867,21 +4947,6 @@ static void rollback_registered(struct net_device *dev)
4867 rollback_registered_many(&single); 4947 rollback_registered_many(&single);
4868} 4948}
4869 4949
4870static void __netdev_init_queue_locks_one(struct net_device *dev,
4871 struct netdev_queue *dev_queue,
4872 void *_unused)
4873{
4874 spin_lock_init(&dev_queue->_xmit_lock);
4875 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4876 dev_queue->xmit_lock_owner = -1;
4877}
4878
4879static void netdev_init_queue_locks(struct net_device *dev)
4880{
4881 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4882 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4883}
4884
4885unsigned long netdev_fix_features(unsigned long features, const char *name) 4950unsigned long netdev_fix_features(unsigned long features, const char *name)
4886{ 4951{
4887 /* Fix illegal SG+CSUM combinations. */ 4952 /* Fix illegal SG+CSUM combinations. */
@@ -4949,6 +5014,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4949} 5014}
4950EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5015EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4951 5016
5017static int netif_alloc_rx_queues(struct net_device *dev)
5018{
5019#ifdef CONFIG_RPS
5020 unsigned int i, count = dev->num_rx_queues;
5021 struct netdev_rx_queue *rx;
5022
5023 BUG_ON(count < 1);
5024
5025 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5026 if (!rx) {
5027 pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5028 return -ENOMEM;
5029 }
5030 dev->_rx = rx;
5031
5032 /*
5033 * Set a pointer to first element in the array which holds the
5034 * reference count.
5035 */
5036 for (i = 0; i < count; i++)
5037 rx[i].first = rx;
5038#endif
5039 return 0;
5040}
5041
5042static int netif_alloc_netdev_queues(struct net_device *dev)
5043{
5044 unsigned int count = dev->num_tx_queues;
5045 struct netdev_queue *tx;
5046
5047 BUG_ON(count < 1);
5048
5049 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5050 if (!tx) {
5051 pr_err("netdev: Unable to allocate %u tx queues.\n",
5052 count);
5053 return -ENOMEM;
5054 }
5055 dev->_tx = tx;
5056 return 0;
5057}
5058
5059static void netdev_init_one_queue(struct net_device *dev,
5060 struct netdev_queue *queue,
5061 void *_unused)
5062{
5063 queue->dev = dev;
5064
5065 /* Initialize queue lock */
5066 spin_lock_init(&queue->_xmit_lock);
5067 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5068 queue->xmit_lock_owner = -1;
5069}
5070
5071static void netdev_init_queues(struct net_device *dev)
5072{
5073 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5074 spin_lock_init(&dev->tx_global_lock);
5075}
5076
4952/** 5077/**
4953 * register_netdevice - register a network device 5078 * register_netdevice - register a network device
4954 * @dev: device to register 5079 * @dev: device to register
@@ -4982,28 +5107,19 @@ int register_netdevice(struct net_device *dev)
4982 5107
4983 spin_lock_init(&dev->addr_list_lock); 5108 spin_lock_init(&dev->addr_list_lock);
4984 netdev_set_addr_lockdep_class(dev); 5109 netdev_set_addr_lockdep_class(dev);
4985 netdev_init_queue_locks(dev);
4986 5110
4987 dev->iflink = -1; 5111 dev->iflink = -1;
4988 5112
4989#ifdef CONFIG_RPS 5113 ret = netif_alloc_rx_queues(dev);
4990 if (!dev->num_rx_queues) { 5114 if (ret)
4991 /* 5115 goto out;
4992 * Allocate a single RX queue if driver never called
4993 * alloc_netdev_mq
4994 */
4995 5116
4996 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); 5117 ret = netif_alloc_netdev_queues(dev);
4997 if (!dev->_rx) { 5118 if (ret)
4998 ret = -ENOMEM; 5119 goto out;
4999 goto out; 5120
5000 } 5121 netdev_init_queues(dev);
5001 5122
5002 dev->_rx->first = dev->_rx;
5003 atomic_set(&dev->_rx->count, 1);
5004 dev->num_rx_queues = 1;
5005 }
5006#endif
5007 /* Init, if this function is available */ 5123 /* Init, if this function is available */
5008 if (dev->netdev_ops->ndo_init) { 5124 if (dev->netdev_ops->ndo_init) {
5009 ret = dev->netdev_ops->ndo_init(dev); 5125 ret = dev->netdev_ops->ndo_init(dev);
@@ -5043,6 +5159,12 @@ int register_netdevice(struct net_device *dev)
5043 if (dev->features & NETIF_F_SG) 5159 if (dev->features & NETIF_F_SG)
5044 dev->features |= NETIF_F_GSO; 5160 dev->features |= NETIF_F_GSO;
5045 5161
5162 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5163 * vlan_dev_init() will do the dev->features check, so these features
5164 * are enabled only if supported by underlying device.
5165 */
5166 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5167
5046 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5168 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5047 ret = notifier_to_errno(ret); 5169 ret = notifier_to_errno(ret);
5048 if (ret) 5170 if (ret)
@@ -5113,9 +5235,6 @@ int init_dummy_netdev(struct net_device *dev)
5113 */ 5235 */
5114 dev->reg_state = NETREG_DUMMY; 5236 dev->reg_state = NETREG_DUMMY;
5115 5237
5116 /* initialize the ref count */
5117 atomic_set(&dev->refcnt, 1);
5118
5119 /* NAPI wants this */ 5238 /* NAPI wants this */
5120 INIT_LIST_HEAD(&dev->napi_list); 5239 INIT_LIST_HEAD(&dev->napi_list);
5121 5240
@@ -5123,6 +5242,11 @@ int init_dummy_netdev(struct net_device *dev)
5123 set_bit(__LINK_STATE_PRESENT, &dev->state); 5242 set_bit(__LINK_STATE_PRESENT, &dev->state);
5124 set_bit(__LINK_STATE_START, &dev->state); 5243 set_bit(__LINK_STATE_START, &dev->state);
5125 5244
5245 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5246 * because users of this 'device' dont need to change
5247 * its refcount.
5248 */
5249
5126 return 0; 5250 return 0;
5127} 5251}
5128EXPORT_SYMBOL_GPL(init_dummy_netdev); 5252EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5164,6 +5288,16 @@ out:
5164} 5288}
5165EXPORT_SYMBOL(register_netdev); 5289EXPORT_SYMBOL(register_netdev);
5166 5290
5291int netdev_refcnt_read(const struct net_device *dev)
5292{
5293 int i, refcnt = 0;
5294
5295 for_each_possible_cpu(i)
5296 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5297 return refcnt;
5298}
5299EXPORT_SYMBOL(netdev_refcnt_read);
5300
5167/* 5301/*
5168 * netdev_wait_allrefs - wait until all references are gone. 5302 * netdev_wait_allrefs - wait until all references are gone.
5169 * 5303 *
@@ -5178,11 +5312,14 @@ EXPORT_SYMBOL(register_netdev);
5178static void netdev_wait_allrefs(struct net_device *dev) 5312static void netdev_wait_allrefs(struct net_device *dev)
5179{ 5313{
5180 unsigned long rebroadcast_time, warning_time; 5314 unsigned long rebroadcast_time, warning_time;
5315 int refcnt;
5181 5316
5182 linkwatch_forget_dev(dev); 5317 linkwatch_forget_dev(dev);
5183 5318
5184 rebroadcast_time = warning_time = jiffies; 5319 rebroadcast_time = warning_time = jiffies;
5185 while (atomic_read(&dev->refcnt) != 0) { 5320 refcnt = netdev_refcnt_read(dev);
5321
5322 while (refcnt != 0) {
5186 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5323 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5187 rtnl_lock(); 5324 rtnl_lock();
5188 5325
@@ -5209,11 +5346,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
5209 5346
5210 msleep(250); 5347 msleep(250);
5211 5348
5349 refcnt = netdev_refcnt_read(dev);
5350
5212 if (time_after(jiffies, warning_time + 10 * HZ)) { 5351 if (time_after(jiffies, warning_time + 10 * HZ)) {
5213 printk(KERN_EMERG "unregister_netdevice: " 5352 printk(KERN_EMERG "unregister_netdevice: "
5214 "waiting for %s to become free. Usage " 5353 "waiting for %s to become free. Usage "
5215 "count = %d\n", 5354 "count = %d\n",
5216 dev->name, atomic_read(&dev->refcnt)); 5355 dev->name, refcnt);
5217 warning_time = jiffies; 5356 warning_time = jiffies;
5218 } 5357 }
5219 } 5358 }
@@ -5271,9 +5410,9 @@ void netdev_run_todo(void)
5271 netdev_wait_allrefs(dev); 5410 netdev_wait_allrefs(dev);
5272 5411
5273 /* paranoia */ 5412 /* paranoia */
5274 BUG_ON(atomic_read(&dev->refcnt)); 5413 BUG_ON(netdev_refcnt_read(dev));
5275 WARN_ON(dev->ip_ptr); 5414 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5276 WARN_ON(dev->ip6_ptr); 5415 WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
5277 WARN_ON(dev->dn_ptr); 5416 WARN_ON(dev->dn_ptr);
5278 5417
5279 if (dev->destructor) 5418 if (dev->destructor)
@@ -5350,30 +5489,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5350 5489
5351 if (ops->ndo_get_stats64) { 5490 if (ops->ndo_get_stats64) {
5352 memset(storage, 0, sizeof(*storage)); 5491 memset(storage, 0, sizeof(*storage));
5353 return ops->ndo_get_stats64(dev, storage); 5492 ops->ndo_get_stats64(dev, storage);
5354 } 5493 } else if (ops->ndo_get_stats) {
5355 if (ops->ndo_get_stats) {
5356 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 5494 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5357 return storage; 5495 } else {
5496 netdev_stats_to_stats64(storage, &dev->stats);
5497 dev_txq_stats_fold(dev, storage);
5358 } 5498 }
5359 netdev_stats_to_stats64(storage, &dev->stats); 5499 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5360 dev_txq_stats_fold(dev, storage);
5361 return storage; 5500 return storage;
5362} 5501}
5363EXPORT_SYMBOL(dev_get_stats); 5502EXPORT_SYMBOL(dev_get_stats);
5364 5503
5365static void netdev_init_one_queue(struct net_device *dev, 5504struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5366 struct netdev_queue *queue,
5367 void *_unused)
5368{ 5505{
5369 queue->dev = dev; 5506 struct netdev_queue *queue = dev_ingress_queue(dev);
5370}
5371 5507
5372static void netdev_init_queues(struct net_device *dev) 5508#ifdef CONFIG_NET_CLS_ACT
5373{ 5509 if (queue)
5374 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 5510 return queue;
5375 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5511 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5376 spin_lock_init(&dev->tx_global_lock); 5512 if (!queue)
5513 return NULL;
5514 netdev_init_one_queue(dev, queue, NULL);
5515 queue->qdisc = &noop_qdisc;
5516 queue->qdisc_sleeping = &noop_qdisc;
5517 rcu_assign_pointer(dev->ingress_queue, queue);
5518#endif
5519 return queue;
5377} 5520}
5378 5521
5379/** 5522/**
@@ -5390,17 +5533,18 @@ static void netdev_init_queues(struct net_device *dev)
5390struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 5533struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5391 void (*setup)(struct net_device *), unsigned int queue_count) 5534 void (*setup)(struct net_device *), unsigned int queue_count)
5392{ 5535{
5393 struct netdev_queue *tx;
5394 struct net_device *dev; 5536 struct net_device *dev;
5395 size_t alloc_size; 5537 size_t alloc_size;
5396 struct net_device *p; 5538 struct net_device *p;
5397#ifdef CONFIG_RPS
5398 struct netdev_rx_queue *rx;
5399 int i;
5400#endif
5401 5539
5402 BUG_ON(strlen(name) >= sizeof(dev->name)); 5540 BUG_ON(strlen(name) >= sizeof(dev->name));
5403 5541
5542 if (queue_count < 1) {
5543 pr_err("alloc_netdev: Unable to allocate device "
5544 "with zero queues.\n");
5545 return NULL;
5546 }
5547
5404 alloc_size = sizeof(struct net_device); 5548 alloc_size = sizeof(struct net_device);
5405 if (sizeof_priv) { 5549 if (sizeof_priv) {
5406 /* ensure 32-byte alignment of private area */ 5550 /* ensure 32-byte alignment of private area */
@@ -5416,55 +5560,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5416 return NULL; 5560 return NULL;
5417 } 5561 }
5418 5562
5419 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5420 if (!tx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "tx qdiscs.\n");
5423 goto free_p;
5424 }
5425
5426#ifdef CONFIG_RPS
5427 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5428 if (!rx) {
5429 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5430 "rx queues.\n");
5431 goto free_tx;
5432 }
5433
5434 atomic_set(&rx->count, queue_count);
5435
5436 /*
5437 * Set a pointer to first element in the array which holds the
5438 * reference count.
5439 */
5440 for (i = 0; i < queue_count; i++)
5441 rx[i].first = rx;
5442#endif
5443
5444 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5563 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5445 dev->padded = (char *)dev - (char *)p; 5564 dev->padded = (char *)dev - (char *)p;
5446 5565
5566 dev->pcpu_refcnt = alloc_percpu(int);
5567 if (!dev->pcpu_refcnt)
5568 goto free_p;
5569
5447 if (dev_addr_init(dev)) 5570 if (dev_addr_init(dev))
5448 goto free_rx; 5571 goto free_pcpu;
5449 5572
5450 dev_mc_init(dev); 5573 dev_mc_init(dev);
5451 dev_uc_init(dev); 5574 dev_uc_init(dev);
5452 5575
5453 dev_net_set(dev, &init_net); 5576 dev_net_set(dev, &init_net);
5454 5577
5455 dev->_tx = tx;
5456 dev->num_tx_queues = queue_count; 5578 dev->num_tx_queues = queue_count;
5457 dev->real_num_tx_queues = queue_count; 5579 dev->real_num_tx_queues = queue_count;
5458 5580
5459#ifdef CONFIG_RPS 5581#ifdef CONFIG_RPS
5460 dev->_rx = rx;
5461 dev->num_rx_queues = queue_count; 5582 dev->num_rx_queues = queue_count;
5583 dev->real_num_rx_queues = queue_count;
5462#endif 5584#endif
5463 5585
5464 dev->gso_max_size = GSO_MAX_SIZE; 5586 dev->gso_max_size = GSO_MAX_SIZE;
5465 5587
5466 netdev_init_queues(dev);
5467
5468 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); 5588 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5469 dev->ethtool_ntuple_list.count = 0; 5589 dev->ethtool_ntuple_list.count = 0;
5470 INIT_LIST_HEAD(&dev->napi_list); 5590 INIT_LIST_HEAD(&dev->napi_list);
@@ -5475,12 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5475 strcpy(dev->name, name); 5595 strcpy(dev->name, name);
5476 return dev; 5596 return dev;
5477 5597
5478free_rx: 5598free_pcpu:
5479#ifdef CONFIG_RPS 5599 free_percpu(dev->pcpu_refcnt);
5480 kfree(rx);
5481free_tx:
5482#endif
5483 kfree(tx);
5484free_p: 5600free_p:
5485 kfree(p); 5601 kfree(p);
5486 return NULL; 5602 return NULL;
@@ -5503,6 +5619,8 @@ void free_netdev(struct net_device *dev)
5503 5619
5504 kfree(dev->_tx); 5620 kfree(dev->_tx);
5505 5621
5622 kfree(rcu_dereference_raw(dev->ingress_queue));
5623
5506 /* Flush device addresses */ 5624 /* Flush device addresses */
5507 dev_addr_flush(dev); 5625 dev_addr_flush(dev);
5508 5626
@@ -5512,6 +5630,9 @@ void free_netdev(struct net_device *dev)
5512 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5630 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5513 netif_napi_del(p); 5631 netif_napi_del(p);
5514 5632
5633 free_percpu(dev->pcpu_refcnt);
5634 dev->pcpu_refcnt = NULL;
5635
5515 /* Compatibility with error handling in drivers */ 5636 /* Compatibility with error handling in drivers */
5516 if (dev->reg_state == NETREG_UNINITIALIZED) { 5637 if (dev->reg_state == NETREG_UNINITIALIZED) {
5517 kfree((char *)dev - dev->padded); 5638 kfree((char *)dev - dev->padded);
@@ -5666,6 +5787,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5666 5787
5667 /* Notify protocols, that we are about to destroy 5788 /* Notify protocols, that we are about to destroy
5668 this device. They should clean all the things. 5789 this device. They should clean all the things.
5790
5791 Note that dev->reg_state stays at NETREG_REGISTERED.
5792 This is wanted because this way 8021q and macvlan know
5793 the device is just moving and can keep their slaves up.
5669 */ 5794 */
5670 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5795 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5671 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5796 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6c41b1fac3db..b99c7c7ffce2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
168{ 168{
169 struct dst_entry *dst; 169 struct dst_entry *dst;
170 170
171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 171 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
172 if (ops->gc(ops)) 172 if (ops->gc(ops))
173 return NULL; 173 return NULL;
174 } 174 }
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
183#if RT_CACHE_DEBUG >= 2 183#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 184 atomic_inc(&dst_total);
185#endif 185#endif
186 atomic_inc(&ops->entries); 186 dst_entries_add(ops, 1);
187 return dst; 187 return dst;
188} 188}
189EXPORT_SYMBOL(dst_alloc); 189EXPORT_SYMBOL(dst_alloc);
@@ -228,15 +228,15 @@ again:
228 child = dst->child; 228 child = dst->child;
229 229
230 dst->hh = NULL; 230 dst->hh = NULL;
231 if (hh && atomic_dec_and_test(&hh->hh_refcnt)) 231 if (hh)
232 kfree(hh); 232 hh_cache_put(hh);
233 233
234 if (neigh) { 234 if (neigh) {
235 dst->neighbour = NULL; 235 dst->neighbour = NULL;
236 neigh_release(neigh); 236 neigh_release(neigh);
237 } 237 }
238 238
239 atomic_dec(&dst->ops->entries); 239 dst_entries_add(dst->ops, -1);
240 240
241 if (dst->ops->destroy) 241 if (dst->ops->destroy)
242 dst->ops->destroy(dst); 242 dst->ops->destroy(dst);
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
271 if (dst) { 271 if (dst) {
272 int newrefcnt; 272 int newrefcnt;
273 273
274 smp_mb__before_atomic_dec();
275 newrefcnt = atomic_dec_return(&dst->__refcnt); 274 newrefcnt = atomic_dec_return(&dst->__refcnt);
276 WARN_ON(newrefcnt < 0); 275 WARN_ON(newrefcnt < 0);
276 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
277 dst = dst_destroy(dst);
278 if (dst)
279 __dst_free(dst);
280 }
277 } 281 }
278} 282}
279EXPORT_SYMBOL(dst_release); 283EXPORT_SYMBOL(dst_release);
280 284
285/**
286 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer
288 * @dst: dst entry
289 *
290 * Sets skb dst, assuming a reference was not taken on dst
291 * skb_dst_drop() should not dst_release() this dst
292 */
293void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
294{
295 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
296 /* If dst not in cache, we must take a reference, because
297 * dst_release() will destroy dst as soon as its refcount becomes zero
298 */
299 if (unlikely(dst->flags & DST_NOCACHE)) {
300 dst_hold(dst);
301 skb_dst_set(skb, dst);
302 } else {
303 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
304 }
305}
306EXPORT_SYMBOL(skb_dst_set_noref);
307
281/* Dirty hack. We did it in 2.2 (in __dst_free), 308/* Dirty hack. We did it in 2.2 (in __dst_free),
282 * we have _very_ good reasons not to repeat 309 * we have _very_ good reasons not to repeat
283 * this mistake in 2.3, but we have no choice 310 * this mistake in 2.3, but we have no choice
@@ -343,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
343 370
344static struct notifier_block dst_dev_notifier = { 371static struct notifier_block dst_dev_notifier = {
345 .notifier_call = dst_dev_event, 372 .notifier_call = dst_dev_event,
373 .priority = -10, /* must be called after other network notifiers */
346}; 374};
347 375
348void __init dst_init(void) 376void __init dst_init(void)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8451ab481095..956a9f4971cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23 24
24/* 25/*
@@ -131,7 +132,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo);
131 * NETIF_F_xxx values in include/linux/netdevice.h 132 * NETIF_F_xxx values in include/linux/netdevice.h
132 */ 133 */
133static const u32 flags_dup_features = 134static const u32 flags_dup_features =
134 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); 135 (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
136 ETH_FLAG_RXHASH);
135 137
136u32 ethtool_op_get_flags(struct net_device *dev) 138u32 ethtool_op_get_flags(struct net_device *dev)
137{ 139{
@@ -205,18 +207,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
205 struct ethtool_drvinfo info; 207 struct ethtool_drvinfo info;
206 const struct ethtool_ops *ops = dev->ethtool_ops; 208 const struct ethtool_ops *ops = dev->ethtool_ops;
207 209
208 if (!ops->get_drvinfo)
209 return -EOPNOTSUPP;
210
211 memset(&info, 0, sizeof(info)); 210 memset(&info, 0, sizeof(info));
212 info.cmd = ETHTOOL_GDRVINFO; 211 info.cmd = ETHTOOL_GDRVINFO;
213 ops->get_drvinfo(dev, &info); 212 if (ops && ops->get_drvinfo) {
213 ops->get_drvinfo(dev, &info);
214 } else if (dev->dev.parent && dev->dev.parent->driver) {
215 strlcpy(info.bus_info, dev_name(dev->dev.parent),
216 sizeof(info.bus_info));
217 strlcpy(info.driver, dev->dev.parent->driver->name,
218 sizeof(info.driver));
219 } else {
220 return -EOPNOTSUPP;
221 }
214 222
215 /* 223 /*
216 * this method of obtaining string set info is deprecated; 224 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead. 225 * Use ETHTOOL_GSSET_INFO instead.
218 */ 226 */
219 if (ops->get_sset_count) { 227 if (ops && ops->get_sset_count) {
220 int rc; 228 int rc;
221 229
222 rc = ops->get_sset_count(dev, ETH_SS_TEST); 230 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +237,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
229 if (rc >= 0) 237 if (rc >= 0)
230 info.n_priv_flags = rc; 238 info.n_priv_flags = rc;
231 } 239 }
232 if (ops->get_regs_len) 240 if (ops && ops->get_regs_len)
233 info.regdump_len = ops->get_regs_len(dev); 241 info.regdump_len = ops->get_regs_len(dev);
234 if (ops->get_eeprom_len) 242 if (ops && ops->get_eeprom_len)
235 info.eedump_len = ops->get_eeprom_len(dev); 243 info.eedump_len = ops->get_eeprom_len(dev);
236 244
237 if (copy_to_user(useraddr, &info, sizeof(info))) 245 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -479,6 +487,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
479 list->count++; 487 list->count++;
480} 488}
481 489
490/*
491 * ethtool does not (or did not) set masks for flow parameters that are
492 * not specified, so if both value and mask are 0 then this must be
493 * treated as equivalent to a mask with all bits set. Implement that
494 * here rather than in drivers.
495 */
496static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
497{
498 struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
499 struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
500
501 if (fs->flow_type != TCP_V4_FLOW &&
502 fs->flow_type != UDP_V4_FLOW &&
503 fs->flow_type != SCTP_V4_FLOW)
504 return;
505
506 if (!(entry->ip4src | mask->ip4src))
507 mask->ip4src = htonl(0xffffffff);
508 if (!(entry->ip4dst | mask->ip4dst))
509 mask->ip4dst = htonl(0xffffffff);
510 if (!(entry->psrc | mask->psrc))
511 mask->psrc = htons(0xffff);
512 if (!(entry->pdst | mask->pdst))
513 mask->pdst = htons(0xffff);
514 if (!(entry->tos | mask->tos))
515 mask->tos = 0xff;
516 if (!(fs->vlan_tag | fs->vlan_tag_mask))
517 fs->vlan_tag_mask = 0xffff;
518 if (!(fs->data | fs->data_mask))
519 fs->data_mask = 0xffffffffffffffffULL;
520}
521
482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, 522static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr) 523 void __user *useraddr)
484{ 524{
@@ -493,6 +533,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
493 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 533 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
494 return -EFAULT; 534 return -EFAULT;
495 535
536 rx_ntuple_fix_masks(&cmd.fs);
537
496 /* 538 /*
497 * Cache filter in dev struct for GET operation only if 539 * Cache filter in dev struct for GET operation only if
498 * the underlying driver doesn't have its own GET operation, and 540 * the underlying driver doesn't have its own GET operation, and
@@ -667,19 +709,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
667 break; 709 break;
668 case IP_USER_FLOW: 710 case IP_USER_FLOW:
669 sprintf(p, "\tSrc IP addr: 0x%x\n", 711 sprintf(p, "\tSrc IP addr: 0x%x\n",
670 fsc->fs.h_u.raw_ip4_spec.ip4src); 712 fsc->fs.h_u.usr_ip4_spec.ip4src);
671 p += ETH_GSTRING_LEN; 713 p += ETH_GSTRING_LEN;
672 num_strings++; 714 num_strings++;
673 sprintf(p, "\tSrc IP mask: 0x%x\n", 715 sprintf(p, "\tSrc IP mask: 0x%x\n",
674 fsc->fs.m_u.raw_ip4_spec.ip4src); 716 fsc->fs.m_u.usr_ip4_spec.ip4src);
675 p += ETH_GSTRING_LEN; 717 p += ETH_GSTRING_LEN;
676 num_strings++; 718 num_strings++;
677 sprintf(p, "\tDest IP addr: 0x%x\n", 719 sprintf(p, "\tDest IP addr: 0x%x\n",
678 fsc->fs.h_u.raw_ip4_spec.ip4dst); 720 fsc->fs.h_u.usr_ip4_spec.ip4dst);
679 p += ETH_GSTRING_LEN; 721 p += ETH_GSTRING_LEN;
680 num_strings++; 722 num_strings++;
681 sprintf(p, "\tDest IP mask: 0x%x\n", 723 sprintf(p, "\tDest IP mask: 0x%x\n",
682 fsc->fs.m_u.raw_ip4_spec.ip4dst); 724 fsc->fs.m_u.usr_ip4_spec.ip4dst);
683 p += ETH_GSTRING_LEN; 725 p += ETH_GSTRING_LEN;
684 num_strings++; 726 num_strings++;
685 break; 727 break;
@@ -775,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
775 if (regs.len > reglen) 817 if (regs.len > reglen)
776 regs.len = reglen; 818 regs.len = reglen;
777 819
778 regbuf = kzalloc(reglen, GFP_USER); 820 regbuf = vmalloc(reglen);
779 if (!regbuf) 821 if (!regbuf)
780 return -ENOMEM; 822 return -ENOMEM;
781 823
@@ -790,7 +832,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
790 ret = 0; 832 ret = 0;
791 833
792 out: 834 out:
793 kfree(regbuf); 835 vfree(regbuf);
794 return ret; 836 return ret;
795} 837}
796 838
@@ -1175,8 +1217,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1175 return -EFAULT; 1217 return -EFAULT;
1176 1218
1177 if (edata.data) { 1219 if (edata.data) {
1178 if (!dev->ethtool_ops->get_rx_csum || 1220 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1179 !dev->ethtool_ops->get_rx_csum(dev)) 1221 dev->ethtool_ops->get_rx_csum(dev) :
1222 ethtool_op_get_rx_csum(dev);
1223
1224 if (!rxcsum)
1180 return -EINVAL; 1225 return -EINVAL;
1181 dev->features |= NETIF_F_GRO; 1226 dev->features |= NETIF_F_GRO;
1182 } else 1227 } else
@@ -1402,14 +1447,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1402 if (!dev || !netif_device_present(dev)) 1447 if (!dev || !netif_device_present(dev))
1403 return -ENODEV; 1448 return -ENODEV;
1404 1449
1405 if (!dev->ethtool_ops)
1406 return -EOPNOTSUPP;
1407
1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1450 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1409 return -EFAULT; 1451 return -EFAULT;
1410 1452
1453 if (!dev->ethtool_ops) {
1454 /* ETHTOOL_GDRVINFO does not require any driver support.
1455 * It is also unprivileged and does not change anything,
1456 * so we can take a shortcut to it. */
1457 if (ethcmd == ETHTOOL_GDRVINFO)
1458 return ethtool_get_drvinfo(dev, useraddr);
1459 else
1460 return -EOPNOTSUPP;
1461 }
1462
1411 /* Allow some commands to be done by anyone */ 1463 /* Allow some commands to be done by anyone */
1412 switch (ethcmd) { 1464 switch (ethcmd) {
1465 case ETHTOOL_GSET:
1413 case ETHTOOL_GDRVINFO: 1466 case ETHTOOL_GDRVINFO:
1414 case ETHTOOL_GMSGLVL: 1467 case ETHTOOL_GMSGLVL:
1415 case ETHTOOL_GCOALESCE: 1468 case ETHTOOL_GCOALESCE:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42e84e08a1be..82a4369ae150 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
144} 144}
145EXPORT_SYMBOL_GPL(fib_rules_register); 145EXPORT_SYMBOL_GPL(fib_rules_register);
146 146
147void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 147static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
148{ 148{
149 struct fib_rule *rule, *tmp; 149 struct fib_rule *rule, *tmp;
150 150
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
153 fib_rule_put(rule); 153 fib_rule_put(rule);
154 } 154 }
155} 155}
156EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
157 156
158static void fib_rules_put_rcu(struct rcu_head *head) 157static void fib_rules_put_rcu(struct rcu_head *head)
159{ 158{
@@ -182,7 +181,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
182{ 181{
183 int ret = 0; 182 int ret = 0;
184 183
185 if (rule->iifindex && (rule->iifindex != fl->iif)) 184 if (rule->iifindex && (rule->iifindex != fl->iif) &&
185 !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
186 goto out; 186 goto out;
187 187
188 if (rule->oifindex && (rule->oifindex != fl->oif)) 188 if (rule->oifindex && (rule->oifindex != fl->oif))
@@ -225,9 +225,12 @@ jumped:
225 err = ops->action(rule, fl, flags, arg); 225 err = ops->action(rule, fl, flags, arg);
226 226
227 if (err != -EAGAIN) { 227 if (err != -EAGAIN) {
228 fib_rule_get(rule); 228 if ((arg->flags & FIB_LOOKUP_NOREF) ||
229 arg->rule = rule; 229 likely(atomic_inc_not_zero(&rule->refcnt))) {
230 goto out; 230 arg->rule = rule;
231 goto out;
232 }
233 break;
231 } 234 }
232 } 235 }
233 236
@@ -348,12 +351,12 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
348 351
349 list_for_each_entry(r, &ops->rules_list, list) { 352 list_for_each_entry(r, &ops->rules_list, list) {
350 if (r->pref == rule->target) { 353 if (r->pref == rule->target) {
351 rule->ctarget = r; 354 RCU_INIT_POINTER(rule->ctarget, r);
352 break; 355 break;
353 } 356 }
354 } 357 }
355 358
356 if (rule->ctarget == NULL) 359 if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
357 unresolved = 1; 360 unresolved = 1;
358 } else if (rule->action == FR_ACT_GOTO) 361 } else if (rule->action == FR_ACT_GOTO)
359 goto errout_free; 362 goto errout_free;
@@ -370,6 +373,11 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
370 373
371 fib_rule_get(rule); 374 fib_rule_get(rule);
372 375
376 if (last)
377 list_add_rcu(&rule->list, &last->list);
378 else
379 list_add_rcu(&rule->list, &ops->rules_list);
380
373 if (ops->unresolved_rules) { 381 if (ops->unresolved_rules) {
374 /* 382 /*
375 * There are unresolved goto rules in the list, check if 383 * There are unresolved goto rules in the list, check if
@@ -378,7 +386,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
378 list_for_each_entry(r, &ops->rules_list, list) { 386 list_for_each_entry(r, &ops->rules_list, list) {
379 if (r->action == FR_ACT_GOTO && 387 if (r->action == FR_ACT_GOTO &&
380 r->target == rule->pref) { 388 r->target == rule->pref) {
381 BUG_ON(r->ctarget != NULL); 389 BUG_ON(rtnl_dereference(r->ctarget) != NULL);
382 rcu_assign_pointer(r->ctarget, rule); 390 rcu_assign_pointer(r->ctarget, rule);
383 if (--ops->unresolved_rules == 0) 391 if (--ops->unresolved_rules == 0)
384 break; 392 break;
@@ -392,11 +400,6 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
392 if (unresolved) 400 if (unresolved)
393 ops->unresolved_rules++; 401 ops->unresolved_rules++;
394 402
395 if (last)
396 list_add_rcu(&rule->list, &last->list);
397 else
398 list_add_rcu(&rule->list, &ops->rules_list);
399
400 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); 403 notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
401 flush_route_cache(ops); 404 flush_route_cache(ops);
402 rules_ops_put(ops); 405 rules_ops_put(ops);
@@ -484,14 +487,13 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
484 */ 487 */
485 if (ops->nr_goto_rules > 0) { 488 if (ops->nr_goto_rules > 0) {
486 list_for_each_entry(tmp, &ops->rules_list, list) { 489 list_for_each_entry(tmp, &ops->rules_list, list) {
487 if (tmp->ctarget == rule) { 490 if (rtnl_dereference(tmp->ctarget) == rule) {
488 rcu_assign_pointer(tmp->ctarget, NULL); 491 rcu_assign_pointer(tmp->ctarget, NULL);
489 ops->unresolved_rules++; 492 ops->unresolved_rules++;
490 } 493 }
491 } 494 }
492 } 495 }
493 496
494 synchronize_rcu();
495 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 497 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
496 NETLINK_CB(skb).pid); 498 NETLINK_CB(skb).pid);
497 fib_rule_put(rule); 499 fib_rule_put(rule);
@@ -543,7 +545,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
543 frh->action = rule->action; 545 frh->action = rule->action;
544 frh->flags = rule->flags; 546 frh->flags = rule->flags;
545 547
546 if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) 548 if (rule->action == FR_ACT_GOTO &&
549 rcu_dereference_raw(rule->ctarget) == NULL)
547 frh->flags |= FIB_RULE_UNRESOLVED; 550 frh->flags |= FIB_RULE_UNRESOLVED;
548 551
549 if (rule->iifname[0]) { 552 if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 52b051f82a01..23e9b2a6b4c8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -89,8 +89,8 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
89 rcu_read_lock_bh(); 89 rcu_read_lock_bh();
90 filter = rcu_dereference_bh(sk->sk_filter); 90 filter = rcu_dereference_bh(sk->sk_filter);
91 if (filter) { 91 if (filter) {
92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
93 filter->len); 93
94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
95 } 95 }
96 rcu_read_unlock_bh(); 96 rcu_read_unlock_bh();
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
112 */ 112 */
113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
114{ 114{
115 struct sock_filter *fentry; /* We walk down these */
116 void *ptr; 115 void *ptr;
117 u32 A = 0; /* Accumulator */ 116 u32 A = 0; /* Accumulator */
118 u32 X = 0; /* Index Register */ 117 u32 X = 0; /* Index Register */
119 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
119 unsigned long memvalid = 0;
120 u32 tmp; 120 u32 tmp;
121 int k; 121 int k;
122 int pc; 122 int pc;
123 123
124 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
124 /* 125 /*
125 * Process array of filter instructions. 126 * Process array of filter instructions.
126 */ 127 */
127 for (pc = 0; pc < flen; pc++) { 128 for (pc = 0; pc < flen; pc++) {
128 fentry = &filter[pc]; 129 const struct sock_filter *fentry = &filter[pc];
130 u32 f_k = fentry->k;
129 131
130 switch (fentry->code) { 132 switch (fentry->code) {
131 case BPF_S_ALU_ADD_X: 133 case BPF_S_ALU_ADD_X:
132 A += X; 134 A += X;
133 continue; 135 continue;
134 case BPF_S_ALU_ADD_K: 136 case BPF_S_ALU_ADD_K:
135 A += fentry->k; 137 A += f_k;
136 continue; 138 continue;
137 case BPF_S_ALU_SUB_X: 139 case BPF_S_ALU_SUB_X:
138 A -= X; 140 A -= X;
139 continue; 141 continue;
140 case BPF_S_ALU_SUB_K: 142 case BPF_S_ALU_SUB_K:
141 A -= fentry->k; 143 A -= f_k;
142 continue; 144 continue;
143 case BPF_S_ALU_MUL_X: 145 case BPF_S_ALU_MUL_X:
144 A *= X; 146 A *= X;
145 continue; 147 continue;
146 case BPF_S_ALU_MUL_K: 148 case BPF_S_ALU_MUL_K:
147 A *= fentry->k; 149 A *= f_k;
148 continue; 150 continue;
149 case BPF_S_ALU_DIV_X: 151 case BPF_S_ALU_DIV_X:
150 if (X == 0) 152 if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
152 A /= X; 154 A /= X;
153 continue; 155 continue;
154 case BPF_S_ALU_DIV_K: 156 case BPF_S_ALU_DIV_K:
155 A /= fentry->k; 157 A /= f_k;
156 continue; 158 continue;
157 case BPF_S_ALU_AND_X: 159 case BPF_S_ALU_AND_X:
158 A &= X; 160 A &= X;
159 continue; 161 continue;
160 case BPF_S_ALU_AND_K: 162 case BPF_S_ALU_AND_K:
161 A &= fentry->k; 163 A &= f_k;
162 continue; 164 continue;
163 case BPF_S_ALU_OR_X: 165 case BPF_S_ALU_OR_X:
164 A |= X; 166 A |= X;
165 continue; 167 continue;
166 case BPF_S_ALU_OR_K: 168 case BPF_S_ALU_OR_K:
167 A |= fentry->k; 169 A |= f_k;
168 continue; 170 continue;
169 case BPF_S_ALU_LSH_X: 171 case BPF_S_ALU_LSH_X:
170 A <<= X; 172 A <<= X;
171 continue; 173 continue;
172 case BPF_S_ALU_LSH_K: 174 case BPF_S_ALU_LSH_K:
173 A <<= fentry->k; 175 A <<= f_k;
174 continue; 176 continue;
175 case BPF_S_ALU_RSH_X: 177 case BPF_S_ALU_RSH_X:
176 A >>= X; 178 A >>= X;
177 continue; 179 continue;
178 case BPF_S_ALU_RSH_K: 180 case BPF_S_ALU_RSH_K:
179 A >>= fentry->k; 181 A >>= f_k;
180 continue; 182 continue;
181 case BPF_S_ALU_NEG: 183 case BPF_S_ALU_NEG:
182 A = -A; 184 A = -A;
183 continue; 185 continue;
184 case BPF_S_JMP_JA: 186 case BPF_S_JMP_JA:
185 pc += fentry->k; 187 pc += f_k;
186 continue; 188 continue;
187 case BPF_S_JMP_JGT_K: 189 case BPF_S_JMP_JGT_K:
188 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 190 pc += (A > f_k) ? fentry->jt : fentry->jf;
189 continue; 191 continue;
190 case BPF_S_JMP_JGE_K: 192 case BPF_S_JMP_JGE_K:
191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 193 pc += (A >= f_k) ? fentry->jt : fentry->jf;
192 continue; 194 continue;
193 case BPF_S_JMP_JEQ_K: 195 case BPF_S_JMP_JEQ_K:
194 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 196 pc += (A == f_k) ? fentry->jt : fentry->jf;
195 continue; 197 continue;
196 case BPF_S_JMP_JSET_K: 198 case BPF_S_JMP_JSET_K:
197 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 199 pc += (A & f_k) ? fentry->jt : fentry->jf;
198 continue; 200 continue;
199 case BPF_S_JMP_JGT_X: 201 case BPF_S_JMP_JGT_X:
200 pc += (A > X) ? fentry->jt : fentry->jf; 202 pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
209 pc += (A & X) ? fentry->jt : fentry->jf; 211 pc += (A & X) ? fentry->jt : fentry->jf;
210 continue; 212 continue;
211 case BPF_S_LD_W_ABS: 213 case BPF_S_LD_W_ABS:
212 k = fentry->k; 214 k = f_k;
213load_w: 215load_w:
214 ptr = load_pointer(skb, k, 4, &tmp); 216 ptr = load_pointer(skb, k, 4, &tmp);
215 if (ptr != NULL) { 217 if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
218 } 220 }
219 break; 221 break;
220 case BPF_S_LD_H_ABS: 222 case BPF_S_LD_H_ABS:
221 k = fentry->k; 223 k = f_k;
222load_h: 224load_h:
223 ptr = load_pointer(skb, k, 2, &tmp); 225 ptr = load_pointer(skb, k, 2, &tmp);
224 if (ptr != NULL) { 226 if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
227 } 229 }
228 break; 230 break;
229 case BPF_S_LD_B_ABS: 231 case BPF_S_LD_B_ABS:
230 k = fentry->k; 232 k = f_k;
231load_b: 233load_b:
232 ptr = load_pointer(skb, k, 1, &tmp); 234 ptr = load_pointer(skb, k, 1, &tmp);
233 if (ptr != NULL) { 235 if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
242 X = skb->len; 244 X = skb->len;
243 continue; 245 continue;
244 case BPF_S_LD_W_IND: 246 case BPF_S_LD_W_IND:
245 k = X + fentry->k; 247 k = X + f_k;
246 goto load_w; 248 goto load_w;
247 case BPF_S_LD_H_IND: 249 case BPF_S_LD_H_IND:
248 k = X + fentry->k; 250 k = X + f_k;
249 goto load_h; 251 goto load_h;
250 case BPF_S_LD_B_IND: 252 case BPF_S_LD_B_IND:
251 k = X + fentry->k; 253 k = X + f_k;
252 goto load_b; 254 goto load_b;
253 case BPF_S_LDX_B_MSH: 255 case BPF_S_LDX_B_MSH:
254 ptr = load_pointer(skb, fentry->k, 1, &tmp); 256 ptr = load_pointer(skb, f_k, 1, &tmp);
255 if (ptr != NULL) { 257 if (ptr != NULL) {
256 X = (*(u8 *)ptr & 0xf) << 2; 258 X = (*(u8 *)ptr & 0xf) << 2;
257 continue; 259 continue;
258 } 260 }
259 return 0; 261 return 0;
260 case BPF_S_LD_IMM: 262 case BPF_S_LD_IMM:
261 A = fentry->k; 263 A = f_k;
262 continue; 264 continue;
263 case BPF_S_LDX_IMM: 265 case BPF_S_LDX_IMM:
264 X = fentry->k; 266 X = f_k;
265 continue; 267 continue;
266 case BPF_S_LD_MEM: 268 case BPF_S_LD_MEM:
267 A = mem[fentry->k]; 269 A = (memvalid & (1UL << f_k)) ?
270 mem[f_k] : 0;
268 continue; 271 continue;
269 case BPF_S_LDX_MEM: 272 case BPF_S_LDX_MEM:
270 X = mem[fentry->k]; 273 X = (memvalid & (1UL << f_k)) ?
274 mem[f_k] : 0;
271 continue; 275 continue;
272 case BPF_S_MISC_TAX: 276 case BPF_S_MISC_TAX:
273 X = A; 277 X = A;
@@ -276,14 +280,16 @@ load_b:
276 A = X; 280 A = X;
277 continue; 281 continue;
278 case BPF_S_RET_K: 282 case BPF_S_RET_K:
279 return fentry->k; 283 return f_k;
280 case BPF_S_RET_A: 284 case BPF_S_RET_A:
281 return A; 285 return A;
282 case BPF_S_ST: 286 case BPF_S_ST:
283 mem[fentry->k] = A; 287 memvalid |= 1UL << f_k;
288 mem[f_k] = A;
284 continue; 289 continue;
285 case BPF_S_STX: 290 case BPF_S_STX:
286 mem[fentry->k] = X; 291 memvalid |= 1UL << f_k;
292 mem[f_k] = X;
287 continue; 293 continue;
288 default: 294 default:
289 WARN_ON(1); 295 WARN_ON(1);
@@ -638,10 +644,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
638 return err; 644 return err;
639 } 645 }
640 646
641 rcu_read_lock_bh(); 647 old_fp = rcu_dereference_protected(sk->sk_filter,
642 old_fp = rcu_dereference_bh(sk->sk_filter); 648 sock_owned_by_user(sk));
643 rcu_assign_pointer(sk->sk_filter, fp); 649 rcu_assign_pointer(sk->sk_filter, fp);
644 rcu_read_unlock_bh();
645 650
646 if (old_fp) 651 if (old_fp)
647 sk_filter_delayed_uncharge(sk, old_fp); 652 sk_filter_delayed_uncharge(sk, old_fp);
@@ -654,14 +659,13 @@ int sk_detach_filter(struct sock *sk)
654 int ret = -ENOENT; 659 int ret = -ENOENT;
655 struct sk_filter *filter; 660 struct sk_filter *filter;
656 661
657 rcu_read_lock_bh(); 662 filter = rcu_dereference_protected(sk->sk_filter,
658 filter = rcu_dereference_bh(sk->sk_filter); 663 sock_owned_by_user(sk));
659 if (filter) { 664 if (filter) {
660 rcu_assign_pointer(sk->sk_filter, NULL); 665 rcu_assign_pointer(sk->sk_filter, NULL);
661 sk_filter_delayed_uncharge(sk, filter); 666 sk_filter_delayed_uncharge(sk, filter);
662 ret = 0; 667 ret = 0;
663 } 668 }
664 rcu_read_unlock_bh();
665 return ret; 669 return ret;
666} 670}
667EXPORT_SYMBOL_GPL(sk_detach_filter); 671EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbfe54ef..127c8a7ffd61 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
53 53
54struct flow_cache { 54struct flow_cache {
55 u32 hash_shift; 55 u32 hash_shift;
56 unsigned long order; 56 struct flow_cache_percpu __percpu *percpu;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier; 57 struct notifier_block hotcpu_notifier;
59 int low_watermark; 58 int low_watermark;
60 int high_watermark; 59 int high_watermark;
@@ -64,7 +63,7 @@ struct flow_cache {
64atomic_t flow_cache_genid = ATOMIC_INIT(0); 63atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid); 64EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global; 65static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep; 66static struct kmem_cache *flow_cachep __read_mostly;
68 67
69static DEFINE_SPINLOCK(flow_cache_gc_lock); 68static DEFINE_SPINLOCK(flow_cache_gc_lock);
70static LIST_HEAD(flow_cache_gc_list); 69static LIST_HEAD(flow_cache_gc_list);
@@ -177,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
177{ 176{
178 u32 *k = (u32 *) key; 177 u32 *k = (u32 *) key;
179 178
180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
181 & (flow_cache_hash_size(fc) - 1)); 180 & (flow_cache_hash_size(fc) - 1);
182} 181}
183 182
184#if (BITS_PER_LONG == 64) 183typedef unsigned long flow_compare_t;
185typedef u64 flow_compare_t;
186#else
187typedef u32 flow_compare_t;
188#endif
189 184
190/* I hear what you're saying, use memcmp. But memcmp cannot make 185/* I hear what you're saying, use memcmp. But memcmp cannot make
191 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@ void flow_cache_flush(void)
357 put_online_cpus(); 352 put_online_cpus();
358} 353}
359 354
360static void __init flow_cache_cpu_prepare(struct flow_cache *fc, 355static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
361 struct flow_cache_percpu *fcp)
362{ 356{
363 fcp->hash_table = (struct hlist_head *) 357 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); 358 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
365 if (!fcp->hash_table)
366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
367 359
368 fcp->hash_rnd_recalc = 1; 360 if (!fcp->hash_table) {
369 fcp->hash_count = 0; 361 fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); 362 if (!fcp->hash_table) {
363 pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
364 return -ENOMEM;
365 }
366 fcp->hash_rnd_recalc = 1;
367 fcp->hash_count = 0;
368 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
369 }
370 return 0;
371} 371}
372 372
373static int flow_cache_cpu(struct notifier_block *nfb, 373static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
374 unsigned long action, 374 unsigned long action,
375 void *hcpu) 375 void *hcpu)
376{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu; 378 int res, cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380 380
381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 switch (action) {
382 case CPU_UP_PREPARE:
383 case CPU_UP_PREPARE_FROZEN:
384 res = flow_cache_cpu_prepare(fc, cpu);
385 if (res)
386 return notifier_from_errno(res);
387 break;
388 case CPU_DEAD:
389 case CPU_DEAD_FROZEN:
382 __flow_cache_shrink(fc, fcp, 0); 390 __flow_cache_shrink(fc, fcp, 0);
391 break;
392 }
383 return NOTIFY_OK; 393 return NOTIFY_OK;
384} 394}
385 395
386static int flow_cache_init(struct flow_cache *fc) 396static int __init flow_cache_init(struct flow_cache *fc)
387{ 397{
388 unsigned long order;
389 int i; 398 int i;
390 399
391 fc->hash_shift = 10; 400 fc->hash_shift = 10;
392 fc->low_watermark = 2 * flow_cache_hash_size(fc); 401 fc->low_watermark = 2 * flow_cache_hash_size(fc);
393 fc->high_watermark = 4 * flow_cache_hash_size(fc); 402 fc->high_watermark = 4 * flow_cache_hash_size(fc);
394 403
395 for (order = 0;
396 (PAGE_SIZE << order) <
397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu); 404 fc->percpu = alloc_percpu(struct flow_cache_percpu);
405 if (!fc->percpu)
406 return -ENOMEM;
402 407
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, 408 for_each_online_cpu(i) {
404 (unsigned long) fc); 409 if (flow_cache_cpu_prepare(fc, i))
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 410 return -ENOMEM;
406 add_timer(&fc->rnd_timer); 411 }
407
408 for_each_possible_cpu(i)
409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){ 412 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu, 413 .notifier_call = flow_cache_cpu,
413 }; 414 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier); 415 register_hotcpu_notifier(&fc->hotcpu_notifier);
415 416
417 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
418 (unsigned long) fc);
419 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
420 add_timer(&fc->rnd_timer);
421
416 return 0; 422 return 0;
417} 423}
418 424
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6743146e4d6b..7c2373321b74 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -274,9 +274,9 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
274 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
275 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
276 276
277 write_lock_bh(&est_lock); 277 write_lock(&est_lock);
278 e->bstats = NULL; 278 e->bstats = NULL;
279 write_unlock_bh(&est_lock); 279 write_unlock(&est_lock);
280 280
281 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index e6b133b77ccb..c40f27e7d208 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,14 +35,15 @@
35 * in any case. 35 * in any case.
36 */ 36 */
37 37
38long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) 38int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
39{ 39{
40 int size, ct; 40 int size, ct, err;
41 long err;
42 41
43 if (m->msg_namelen) { 42 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 43 if (mode == VERIFY_READ) {
45 err = move_addr_to_kernel(m->msg_name, m->msg_namelen, 44 void __user *namep;
45 namep = (void __user __force *) m->msg_name;
46 err = move_addr_to_kernel(namep, m->msg_namelen,
46 address); 47 address);
47 if (err < 0) 48 if (err < 0)
48 return err; 49 return err;
@@ -53,21 +54,20 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
53 } 54 }
54 55
55 size = m->msg_iovlen * sizeof(struct iovec); 56 size = m->msg_iovlen * sizeof(struct iovec);
56 if (copy_from_user(iov, m->msg_iov, size)) 57 if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
57 return -EFAULT; 58 return -EFAULT;
58 59
59 m->msg_iov = iov; 60 m->msg_iov = iov;
60 err = 0; 61 err = 0;
61 62
62 for (ct = 0; ct < m->msg_iovlen; ct++) { 63 for (ct = 0; ct < m->msg_iovlen; ct++) {
63 err += iov[ct].iov_len; 64 size_t len = iov[ct].iov_len;
64 /* 65
65 * Goal is not to verify user data, but to prevent returning 66 if (len > INT_MAX - err) {
66 * negative value, which is interpreted as errno. 67 len = INT_MAX - err;
67 * Overflow is still possible, but it is harmless. 68 iov[ct].iov_len = len;
68 */ 69 }
69 if (err < 0) 70 err += len;
70 return -EMSGSIZE;
71 } 71 }
72 72
73 return err; 73 return err;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..8cc8f9a79db9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
122 122
123unsigned long neigh_rand_reach_time(unsigned long base) 123unsigned long neigh_rand_reach_time(unsigned long base)
124{ 124{
125 return (base ? (net_random() % base) + (base >> 1) : 0); 125 return base ? (net_random() % base) + (base >> 1) : 0;
126} 126}
127EXPORT_SYMBOL(neigh_rand_reach_time); 127EXPORT_SYMBOL(neigh_rand_reach_time);
128 128
@@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
131{ 131{
132 int shrunk = 0; 132 int shrunk = 0;
133 int i; 133 int i;
134 struct neigh_hash_table *nht;
134 135
135 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 136 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 137
137 write_lock_bh(&tbl->lock); 138 write_lock_bh(&tbl->lock);
138 for (i = 0; i <= tbl->hash_mask; i++) { 139 nht = rcu_dereference_protected(tbl->nht,
139 struct neighbour *n, **np; 140 lockdep_is_held(&tbl->lock));
141 for (i = 0; i <= nht->hash_mask; i++) {
142 struct neighbour *n;
143 struct neighbour __rcu **np;
140 144
141 np = &tbl->hash_buckets[i]; 145 np = &nht->hash_buckets[i];
142 while ((n = *np) != NULL) { 146 while ((n = rcu_dereference_protected(*np,
147 lockdep_is_held(&tbl->lock))) != NULL) {
143 /* Neighbour record may be discarded if: 148 /* Neighbour record may be discarded if:
144 * - nobody refers to it. 149 * - nobody refers to it.
145 * - it is not permanent 150 * - it is not permanent
@@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
147 write_lock(&n->lock); 152 write_lock(&n->lock);
148 if (atomic_read(&n->refcnt) == 1 && 153 if (atomic_read(&n->refcnt) == 1 &&
149 !(n->nud_state & NUD_PERMANENT)) { 154 !(n->nud_state & NUD_PERMANENT)) {
150 *np = n->next; 155 rcu_assign_pointer(*np,
156 rcu_dereference_protected(n->next,
157 lockdep_is_held(&tbl->lock)));
151 n->dead = 1; 158 n->dead = 1;
152 shrunk = 1; 159 shrunk = 1;
153 write_unlock(&n->lock); 160 write_unlock(&n->lock);
@@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
199static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) 206static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200{ 207{
201 int i; 208 int i;
209 struct neigh_hash_table *nht;
202 210
203 for (i = 0; i <= tbl->hash_mask; i++) { 211 nht = rcu_dereference_protected(tbl->nht,
204 struct neighbour *n, **np = &tbl->hash_buckets[i]; 212 lockdep_is_held(&tbl->lock));
205 213
206 while ((n = *np) != NULL) { 214 for (i = 0; i <= nht->hash_mask; i++) {
215 struct neighbour *n;
216 struct neighbour __rcu **np = &nht->hash_buckets[i];
217
218 while ((n = rcu_dereference_protected(*np,
219 lockdep_is_held(&tbl->lock))) != NULL) {
207 if (dev && n->dev != dev) { 220 if (dev && n->dev != dev) {
208 np = &n->next; 221 np = &n->next;
209 continue; 222 continue;
210 } 223 }
211 *np = n->next; 224 rcu_assign_pointer(*np,
225 rcu_dereference_protected(n->next,
226 lockdep_is_held(&tbl->lock)));
212 write_lock(&n->lock); 227 write_lock(&n->lock);
213 neigh_del_timer(n); 228 neigh_del_timer(n);
214 n->dead = 1; 229 n->dead = 1;
@@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
279 294
280 skb_queue_head_init(&n->arp_queue); 295 skb_queue_head_init(&n->arp_queue);
281 rwlock_init(&n->lock); 296 rwlock_init(&n->lock);
297 seqlock_init(&n->ha_lock);
282 n->updated = n->used = now; 298 n->updated = n->used = now;
283 n->nud_state = NUD_NONE; 299 n->nud_state = NUD_NONE;
284 n->output = neigh_blackhole; 300 n->output = neigh_blackhole;
@@ -297,64 +313,86 @@ out_entries:
297 goto out; 313 goto out;
298} 314}
299 315
300static struct neighbour **neigh_hash_alloc(unsigned int entries) 316static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
301{ 317{
302 unsigned long size = entries * sizeof(struct neighbour *); 318 size_t size = entries * sizeof(struct neighbour *);
303 struct neighbour **ret; 319 struct neigh_hash_table *ret;
320 struct neighbour **buckets;
304 321
305 if (size <= PAGE_SIZE) { 322 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
306 ret = kzalloc(size, GFP_ATOMIC); 323 if (!ret)
307 } else { 324 return NULL;
308 ret = (struct neighbour **) 325 if (size <= PAGE_SIZE)
309 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); 326 buckets = kzalloc(size, GFP_ATOMIC);
327 else
328 buckets = (struct neighbour **)
329 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330 get_order(size));
331 if (!buckets) {
332 kfree(ret);
333 return NULL;
310 } 334 }
335 rcu_assign_pointer(ret->hash_buckets, buckets);
336 ret->hash_mask = entries - 1;
337 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
311 return ret; 338 return ret;
312} 339}
313 340
314static void neigh_hash_free(struct neighbour **hash, unsigned int entries) 341static void neigh_hash_free_rcu(struct rcu_head *head)
315{ 342{
316 unsigned long size = entries * sizeof(struct neighbour *); 343 struct neigh_hash_table *nht = container_of(head,
344 struct neigh_hash_table,
345 rcu);
346 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
347 struct neighbour **buckets = nht->hash_buckets;
317 348
318 if (size <= PAGE_SIZE) 349 if (size <= PAGE_SIZE)
319 kfree(hash); 350 kfree(buckets);
320 else 351 else
321 free_pages((unsigned long)hash, get_order(size)); 352 free_pages((unsigned long)buckets, get_order(size));
353 kfree(nht);
322} 354}
323 355
324static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) 356static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357 unsigned long new_entries)
325{ 358{
326 struct neighbour **new_hash, **old_hash; 359 unsigned int i, hash;
327 unsigned int i, new_hash_mask, old_entries; 360 struct neigh_hash_table *new_nht, *old_nht;
328 361
329 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 362 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
330 363
331 BUG_ON(!is_power_of_2(new_entries)); 364 BUG_ON(!is_power_of_2(new_entries));
332 new_hash = neigh_hash_alloc(new_entries); 365 old_nht = rcu_dereference_protected(tbl->nht,
333 if (!new_hash) 366 lockdep_is_held(&tbl->lock));
334 return; 367 new_nht = neigh_hash_alloc(new_entries);
335 368 if (!new_nht)
336 old_entries = tbl->hash_mask + 1; 369 return old_nht;
337 new_hash_mask = new_entries - 1;
338 old_hash = tbl->hash_buckets;
339 370
340 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); 371 for (i = 0; i <= old_nht->hash_mask; i++) {
341 for (i = 0; i < old_entries; i++) {
342 struct neighbour *n, *next; 372 struct neighbour *n, *next;
343 373
344 for (n = old_hash[i]; n; n = next) { 374 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
345 unsigned int hash_val = tbl->hash(n->primary_key, n->dev); 375 lockdep_is_held(&tbl->lock));
346 376 n != NULL;
347 hash_val &= new_hash_mask; 377 n = next) {
348 next = n->next; 378 hash = tbl->hash(n->primary_key, n->dev,
349 379 new_nht->hash_rnd);
350 n->next = new_hash[hash_val]; 380
351 new_hash[hash_val] = n; 381 hash &= new_nht->hash_mask;
382 next = rcu_dereference_protected(n->next,
383 lockdep_is_held(&tbl->lock));
384
385 rcu_assign_pointer(n->next,
386 rcu_dereference_protected(
387 new_nht->hash_buckets[hash],
388 lockdep_is_held(&tbl->lock)));
389 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
352 } 390 }
353 } 391 }
354 tbl->hash_buckets = new_hash;
355 tbl->hash_mask = new_hash_mask;
356 392
357 neigh_hash_free(old_hash, old_entries); 393 rcu_assign_pointer(tbl->nht, new_nht);
394 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395 return new_nht;
358} 396}
359 397
360struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 398struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
363 struct neighbour *n; 401 struct neighbour *n;
364 int key_len = tbl->key_len; 402 int key_len = tbl->key_len;
365 u32 hash_val; 403 u32 hash_val;
404 struct neigh_hash_table *nht;
366 405
367 NEIGH_CACHE_STAT_INC(tbl, lookups); 406 NEIGH_CACHE_STAT_INC(tbl, lookups);
368 407
369 read_lock_bh(&tbl->lock); 408 rcu_read_lock_bh();
370 hash_val = tbl->hash(pkey, dev); 409 nht = rcu_dereference_bh(tbl->nht);
371 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 410 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411
412 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413 n != NULL;
414 n = rcu_dereference_bh(n->next)) {
372 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { 415 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
373 neigh_hold(n); 416 if (!atomic_inc_not_zero(&n->refcnt))
417 n = NULL;
374 NEIGH_CACHE_STAT_INC(tbl, hits); 418 NEIGH_CACHE_STAT_INC(tbl, hits);
375 break; 419 break;
376 } 420 }
377 } 421 }
378 read_unlock_bh(&tbl->lock); 422
423 rcu_read_unlock_bh();
379 return n; 424 return n;
380} 425}
381EXPORT_SYMBOL(neigh_lookup); 426EXPORT_SYMBOL(neigh_lookup);
@@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
386 struct neighbour *n; 431 struct neighbour *n;
387 int key_len = tbl->key_len; 432 int key_len = tbl->key_len;
388 u32 hash_val; 433 u32 hash_val;
434 struct neigh_hash_table *nht;
389 435
390 NEIGH_CACHE_STAT_INC(tbl, lookups); 436 NEIGH_CACHE_STAT_INC(tbl, lookups);
391 437
392 read_lock_bh(&tbl->lock); 438 rcu_read_lock_bh();
393 hash_val = tbl->hash(pkey, NULL); 439 nht = rcu_dereference_bh(tbl->nht);
394 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 440 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441
442 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443 n != NULL;
444 n = rcu_dereference_bh(n->next)) {
395 if (!memcmp(n->primary_key, pkey, key_len) && 445 if (!memcmp(n->primary_key, pkey, key_len) &&
396 net_eq(dev_net(n->dev), net)) { 446 net_eq(dev_net(n->dev), net)) {
397 neigh_hold(n); 447 if (!atomic_inc_not_zero(&n->refcnt))
448 n = NULL;
398 NEIGH_CACHE_STAT_INC(tbl, hits); 449 NEIGH_CACHE_STAT_INC(tbl, hits);
399 break; 450 break;
400 } 451 }
401 } 452 }
402 read_unlock_bh(&tbl->lock); 453
454 rcu_read_unlock_bh();
403 return n; 455 return n;
404} 456}
405EXPORT_SYMBOL(neigh_lookup_nodev); 457EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
411 int key_len = tbl->key_len; 463 int key_len = tbl->key_len;
412 int error; 464 int error;
413 struct neighbour *n1, *rc, *n = neigh_alloc(tbl); 465 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466 struct neigh_hash_table *nht;
414 467
415 if (!n) { 468 if (!n) {
416 rc = ERR_PTR(-ENOBUFS); 469 rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
437 n->confirmed = jiffies - (n->parms->base_reachable_time << 1); 490 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
438 491
439 write_lock_bh(&tbl->lock); 492 write_lock_bh(&tbl->lock);
493 nht = rcu_dereference_protected(tbl->nht,
494 lockdep_is_held(&tbl->lock));
440 495
441 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) 496 if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
442 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); 497 nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
443 498
444 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; 499 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
445 500
446 if (n->parms->dead) { 501 if (n->parms->dead) {
447 rc = ERR_PTR(-EINVAL); 502 rc = ERR_PTR(-EINVAL);
448 goto out_tbl_unlock; 503 goto out_tbl_unlock;
449 } 504 }
450 505
451 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { 506 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507 lockdep_is_held(&tbl->lock));
508 n1 != NULL;
509 n1 = rcu_dereference_protected(n1->next,
510 lockdep_is_held(&tbl->lock))) {
452 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 511 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
453 neigh_hold(n1); 512 neigh_hold(n1);
454 rc = n1; 513 rc = n1;
@@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
456 } 515 }
457 } 516 }
458 517
459 n->next = tbl->hash_buckets[hash_val];
460 tbl->hash_buckets[hash_val] = n;
461 n->dead = 0; 518 n->dead = 0;
462 neigh_hold(n); 519 neigh_hold(n);
520 rcu_assign_pointer(n->next,
521 rcu_dereference_protected(nht->hash_buckets[hash_val],
522 lockdep_is_held(&tbl->lock)));
523 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
463 write_unlock_bh(&tbl->lock); 524 write_unlock_bh(&tbl->lock);
464 NEIGH_PRINTK2("neigh %p is created.\n", n); 525 NEIGH_PRINTK2("neigh %p is created.\n", n);
465 rc = n; 526 rc = n;
@@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
616 neigh_parms_destroy(parms); 677 neigh_parms_destroy(parms);
617} 678}
618 679
680static void neigh_destroy_rcu(struct rcu_head *head)
681{
682 struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685}
619/* 686/*
620 * neighbour must already be out of the table; 687 * neighbour must already be out of the table;
621 * 688 *
@@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)
643 write_seqlock_bh(&hh->hh_lock); 710 write_seqlock_bh(&hh->hh_lock);
644 hh->hh_output = neigh_blackhole; 711 hh->hh_output = neigh_blackhole;
645 write_sequnlock_bh(&hh->hh_lock); 712 write_sequnlock_bh(&hh->hh_lock);
646 if (atomic_dec_and_test(&hh->hh_refcnt)) 713 hh_cache_put(hh);
647 kfree(hh);
648 } 714 }
649 715
650 skb_queue_purge(&neigh->arp_queue); 716 skb_queue_purge(&neigh->arp_queue);
@@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
655 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); 721 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
656 722
657 atomic_dec(&neigh->tbl->entries); 723 atomic_dec(&neigh->tbl->entries);
658 kmem_cache_free(neigh->tbl->kmem_cachep, neigh); 724 call_rcu(&neigh->rcu, neigh_destroy_rcu);
659} 725}
660EXPORT_SYMBOL(neigh_destroy); 726EXPORT_SYMBOL(neigh_destroy);
661 727
@@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)
696static void neigh_periodic_work(struct work_struct *work) 762static void neigh_periodic_work(struct work_struct *work)
697{ 763{
698 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 764 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
699 struct neighbour *n, **np; 765 struct neighbour *n;
766 struct neighbour __rcu **np;
700 unsigned int i; 767 unsigned int i;
768 struct neigh_hash_table *nht;
701 769
702 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 770 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
703 771
704 write_lock_bh(&tbl->lock); 772 write_lock_bh(&tbl->lock);
773 nht = rcu_dereference_protected(tbl->nht,
774 lockdep_is_held(&tbl->lock));
705 775
706 /* 776 /*
707 * periodically recompute ReachableTime from random function 777 * periodically recompute ReachableTime from random function
@@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)
715 neigh_rand_reach_time(p->base_reachable_time); 785 neigh_rand_reach_time(p->base_reachable_time);
716 } 786 }
717 787
718 for (i = 0 ; i <= tbl->hash_mask; i++) { 788 for (i = 0 ; i <= nht->hash_mask; i++) {
719 np = &tbl->hash_buckets[i]; 789 np = &nht->hash_buckets[i];
720 790
721 while ((n = *np) != NULL) { 791 while ((n = rcu_dereference_protected(*np,
792 lockdep_is_held(&tbl->lock))) != NULL) {
722 unsigned int state; 793 unsigned int state;
723 794
724 write_lock(&n->lock); 795 write_lock(&n->lock);
@@ -766,9 +837,9 @@ next_elt:
766static __inline__ int neigh_max_probes(struct neighbour *n) 837static __inline__ int neigh_max_probes(struct neighbour *n)
767{ 838{
768 struct neigh_parms *p = n->parms; 839 struct neigh_parms *p = n->parms;
769 return (n->nud_state & NUD_PROBE ? 840 return (n->nud_state & NUD_PROBE) ?
770 p->ucast_probes : 841 p->ucast_probes :
771 p->ucast_probes + p->app_probes + p->mcast_probes); 842 p->ucast_probes + p->app_probes + p->mcast_probes;
772} 843}
773 844
774static void neigh_invalidate(struct neighbour *neigh) 845static void neigh_invalidate(struct neighbour *neigh)
@@ -945,7 +1016,7 @@ out_unlock_bh:
945} 1016}
946EXPORT_SYMBOL(__neigh_event_send); 1017EXPORT_SYMBOL(__neigh_event_send);
947 1018
948static void neigh_update_hhs(struct neighbour *neigh) 1019static void neigh_update_hhs(const struct neighbour *neigh)
949{ 1020{
950 struct hh_cache *hh; 1021 struct hh_cache *hh;
951 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1022 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1081 } 1152 }
1082 1153
1083 if (lladdr != neigh->ha) { 1154 if (lladdr != neigh->ha) {
1155 write_seqlock(&neigh->ha_lock);
1084 memcpy(&neigh->ha, lladdr, dev->addr_len); 1156 memcpy(&neigh->ha, lladdr, dev->addr_len);
1157 write_sequnlock(&neigh->ha_lock);
1085 neigh_update_hhs(neigh); 1158 neigh_update_hhs(neigh);
1086 if (!(new & NUD_CONNECTED)) 1159 if (!(new & NUD_CONNECTED))
1087 neigh->confirmed = jiffies - 1160 neigh->confirmed = jiffies -
@@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1139} 1212}
1140EXPORT_SYMBOL(neigh_event_ns); 1213EXPORT_SYMBOL(neigh_event_ns);
1141 1214
1215static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1216 __be16 protocol)
1217{
1218 struct hh_cache *hh;
1219
1220 smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1221 for (hh = n->hh; hh; hh = hh->hh_next) {
1222 if (hh->hh_type == protocol) {
1223 atomic_inc(&hh->hh_refcnt);
1224 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1225 hh_cache_put(hh);
1226 return true;
1227 }
1228 }
1229 return false;
1230}
1231
1232/* called with read_lock_bh(&n->lock); */
1142static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, 1233static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1143 __be16 protocol) 1234 __be16 protocol)
1144{ 1235{
1145 struct hh_cache *hh; 1236 struct hh_cache *hh;
1146 struct net_device *dev = dst->dev; 1237 struct net_device *dev = dst->dev;
1147 1238
1148 for (hh = n->hh; hh; hh = hh->hh_next) 1239 if (likely(neigh_hh_lookup(n, dst, protocol)))
1149 if (hh->hh_type == protocol) 1240 return;
1150 break;
1151 1241
1152 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { 1242 /* slow path */
1153 seqlock_init(&hh->hh_lock); 1243 hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1154 hh->hh_type = protocol; 1244 if (!hh)
1155 atomic_set(&hh->hh_refcnt, 0); 1245 return;
1156 hh->hh_next = NULL;
1157 1246
1158 if (dev->header_ops->cache(n, hh)) { 1247 seqlock_init(&hh->hh_lock);
1159 kfree(hh); 1248 hh->hh_type = protocol;
1160 hh = NULL; 1249 atomic_set(&hh->hh_refcnt, 2);
1161 } else { 1250
1162 atomic_inc(&hh->hh_refcnt); 1251 if (dev->header_ops->cache(n, hh)) {
1163 hh->hh_next = n->hh; 1252 kfree(hh);
1164 n->hh = hh; 1253 return;
1165 if (n->nud_state & NUD_CONNECTED)
1166 hh->hh_output = n->ops->hh_output;
1167 else
1168 hh->hh_output = n->ops->output;
1169 }
1170 } 1254 }
1171 if (hh) { 1255
1172 atomic_inc(&hh->hh_refcnt); 1256 write_lock_bh(&n->lock);
1173 dst->hh = hh; 1257
1258 /* must check if another thread already did the insert */
1259 if (neigh_hh_lookup(n, dst, protocol)) {
1260 kfree(hh);
1261 goto end;
1174 } 1262 }
1263
1264 if (n->nud_state & NUD_CONNECTED)
1265 hh->hh_output = n->ops->hh_output;
1266 else
1267 hh->hh_output = n->ops->output;
1268
1269 hh->hh_next = n->hh;
1270 smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1271 n->hh = hh;
1272
1273 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1274 hh_cache_put(hh);
1275end:
1276 write_unlock_bh(&n->lock);
1175} 1277}
1176 1278
1177/* This function can be used in contexts, where only old dev_queue_xmit 1279/* This function can be used in contexts, where only old dev_queue_xmit
1178 worked, f.e. if you want to override normal output path (eql, shaper), 1280 * worked, f.e. if you want to override normal output path (eql, shaper),
1179 but resolution is not made yet. 1281 * but resolution is not made yet.
1180 */ 1282 */
1181 1283
1182int neigh_compat_output(struct sk_buff *skb) 1284int neigh_compat_output(struct sk_buff *skb)
@@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)
1210 if (!neigh_event_send(neigh, skb)) { 1312 if (!neigh_event_send(neigh, skb)) {
1211 int err; 1313 int err;
1212 struct net_device *dev = neigh->dev; 1314 struct net_device *dev = neigh->dev;
1213 if (dev->header_ops->cache && !dst->hh) { 1315 unsigned int seq;
1214 write_lock_bh(&neigh->lock); 1316
1215 if (!dst->hh) 1317 if (dev->header_ops->cache &&
1216 neigh_hh_init(neigh, dst, dst->ops->protocol); 1318 !dst->hh &&
1217 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1319 !(dst->flags & DST_NOCACHE))
1218 neigh->ha, NULL, skb->len); 1320 neigh_hh_init(neigh, dst, dst->ops->protocol);
1219 write_unlock_bh(&neigh->lock); 1321
1220 } else { 1322 do {
1221 read_lock_bh(&neigh->lock); 1323 seq = read_seqbegin(&neigh->ha_lock);
1222 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1324 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1223 neigh->ha, NULL, skb->len); 1325 neigh->ha, NULL, skb->len);
1224 read_unlock_bh(&neigh->lock); 1326 } while (read_seqretry(&neigh->ha_lock, seq));
1225 } 1327
1226 if (err >= 0) 1328 if (err >= 0)
1227 rc = neigh->ops->queue_xmit(skb); 1329 rc = neigh->ops->queue_xmit(skb);
1228 else 1330 else
@@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
1248 struct dst_entry *dst = skb_dst(skb); 1350 struct dst_entry *dst = skb_dst(skb);
1249 struct neighbour *neigh = dst->neighbour; 1351 struct neighbour *neigh = dst->neighbour;
1250 struct net_device *dev = neigh->dev; 1352 struct net_device *dev = neigh->dev;
1353 unsigned int seq;
1251 1354
1252 __skb_pull(skb, skb_network_offset(skb)); 1355 __skb_pull(skb, skb_network_offset(skb));
1253 1356
1254 read_lock_bh(&neigh->lock); 1357 do {
1255 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1358 seq = read_seqbegin(&neigh->ha_lock);
1256 neigh->ha, NULL, skb->len); 1359 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1257 read_unlock_bh(&neigh->lock); 1360 neigh->ha, NULL, skb->len);
1361 } while (read_seqretry(&neigh->ha_lock, seq));
1362
1258 if (err >= 0) 1363 if (err >= 0)
1259 err = neigh->ops->queue_xmit(skb); 1364 err = neigh->ops->queue_xmit(skb);
1260 else { 1365 else {
@@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1436 panic("cannot create neighbour proc dir entry"); 1541 panic("cannot create neighbour proc dir entry");
1437#endif 1542#endif
1438 1543
1439 tbl->hash_mask = 1; 1544 tbl->nht = neigh_hash_alloc(8);
1440 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1441 1545
1442 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1546 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1443 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1547 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1444 1548
1445 if (!tbl->hash_buckets || !tbl->phash_buckets) 1549 if (!tbl->nht || !tbl->phash_buckets)
1446 panic("cannot allocate neighbour cache hashes"); 1550 panic("cannot allocate neighbour cache hashes");
1447 1551
1448 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1449
1450 rwlock_init(&tbl->lock); 1552 rwlock_init(&tbl->lock);
1451 INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); 1553 INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1452 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1554 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)
1486 struct neigh_table **tp; 1588 struct neigh_table **tp;
1487 1589
1488 /* It is not clean... Fix it to unload IPv6 module safely */ 1590 /* It is not clean... Fix it to unload IPv6 module safely */
1489 cancel_delayed_work(&tbl->gc_work); 1591 cancel_delayed_work_sync(&tbl->gc_work);
1490 flush_scheduled_work();
1491 del_timer_sync(&tbl->proxy_timer); 1592 del_timer_sync(&tbl->proxy_timer);
1492 pneigh_queue_purge(&tbl->proxy_queue); 1593 pneigh_queue_purge(&tbl->proxy_queue);
1493 neigh_ifdown(tbl, NULL); 1594 neigh_ifdown(tbl, NULL);
@@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)
1502 } 1603 }
1503 write_unlock(&neigh_tbl_lock); 1604 write_unlock(&neigh_tbl_lock);
1504 1605
1505 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); 1606 call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
1506 tbl->hash_buckets = NULL; 1607 tbl->nht = NULL;
1507 1608
1508 kfree(tbl->phash_buckets); 1609 kfree(tbl->phash_buckets);
1509 tbl->phash_buckets = NULL; 1610 tbl->phash_buckets = NULL;
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1529 struct net_device *dev = NULL; 1630 struct net_device *dev = NULL;
1530 int err = -EINVAL; 1631 int err = -EINVAL;
1531 1632
1633 ASSERT_RTNL();
1532 if (nlmsg_len(nlh) < sizeof(*ndm)) 1634 if (nlmsg_len(nlh) < sizeof(*ndm))
1533 goto out; 1635 goto out;
1534 1636
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1538 1640
1539 ndm = nlmsg_data(nlh); 1641 ndm = nlmsg_data(nlh);
1540 if (ndm->ndm_ifindex) { 1642 if (ndm->ndm_ifindex) {
1541 dev = dev_get_by_index(net, ndm->ndm_ifindex); 1643 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1542 if (dev == NULL) { 1644 if (dev == NULL) {
1543 err = -ENODEV; 1645 err = -ENODEV;
1544 goto out; 1646 goto out;
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1554 read_unlock(&neigh_tbl_lock); 1656 read_unlock(&neigh_tbl_lock);
1555 1657
1556 if (nla_len(dst_attr) < tbl->key_len) 1658 if (nla_len(dst_attr) < tbl->key_len)
1557 goto out_dev_put; 1659 goto out;
1558 1660
1559 if (ndm->ndm_flags & NTF_PROXY) { 1661 if (ndm->ndm_flags & NTF_PROXY) {
1560 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1662 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1561 goto out_dev_put; 1663 goto out;
1562 } 1664 }
1563 1665
1564 if (dev == NULL) 1666 if (dev == NULL)
1565 goto out_dev_put; 1667 goto out;
1566 1668
1567 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1669 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1568 if (neigh == NULL) { 1670 if (neigh == NULL) {
1569 err = -ENOENT; 1671 err = -ENOENT;
1570 goto out_dev_put; 1672 goto out;
1571 } 1673 }
1572 1674
1573 err = neigh_update(neigh, NULL, NUD_FAILED, 1675 err = neigh_update(neigh, NULL, NUD_FAILED,
1574 NEIGH_UPDATE_F_OVERRIDE | 1676 NEIGH_UPDATE_F_OVERRIDE |
1575 NEIGH_UPDATE_F_ADMIN); 1677 NEIGH_UPDATE_F_ADMIN);
1576 neigh_release(neigh); 1678 neigh_release(neigh);
1577 goto out_dev_put; 1679 goto out;
1578 } 1680 }
1579 read_unlock(&neigh_tbl_lock); 1681 read_unlock(&neigh_tbl_lock);
1580 err = -EAFNOSUPPORT; 1682 err = -EAFNOSUPPORT;
1581 1683
1582out_dev_put:
1583 if (dev)
1584 dev_put(dev);
1585out: 1684out:
1586 return err; 1685 return err;
1587} 1686}
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1595 struct net_device *dev = NULL; 1694 struct net_device *dev = NULL;
1596 int err; 1695 int err;
1597 1696
1697 ASSERT_RTNL();
1598 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 1698 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1599 if (err < 0) 1699 if (err < 0)
1600 goto out; 1700 goto out;
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1605 1705
1606 ndm = nlmsg_data(nlh); 1706 ndm = nlmsg_data(nlh);
1607 if (ndm->ndm_ifindex) { 1707 if (ndm->ndm_ifindex) {
1608 dev = dev_get_by_index(net, ndm->ndm_ifindex); 1708 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1609 if (dev == NULL) { 1709 if (dev == NULL) {
1610 err = -ENODEV; 1710 err = -ENODEV;
1611 goto out; 1711 goto out;
1612 } 1712 }
1613 1713
1614 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) 1714 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1615 goto out_dev_put; 1715 goto out;
1616 } 1716 }
1617 1717
1618 read_lock(&neigh_tbl_lock); 1718 read_lock(&neigh_tbl_lock);
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1626 read_unlock(&neigh_tbl_lock); 1726 read_unlock(&neigh_tbl_lock);
1627 1727
1628 if (nla_len(tb[NDA_DST]) < tbl->key_len) 1728 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1629 goto out_dev_put; 1729 goto out;
1630 dst = nla_data(tb[NDA_DST]); 1730 dst = nla_data(tb[NDA_DST]);
1631 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1731 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1632 1732
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1639 pn->flags = ndm->ndm_flags; 1739 pn->flags = ndm->ndm_flags;
1640 err = 0; 1740 err = 0;
1641 } 1741 }
1642 goto out_dev_put; 1742 goto out;
1643 } 1743 }
1644 1744
1645 if (dev == NULL) 1745 if (dev == NULL)
1646 goto out_dev_put; 1746 goto out;
1647 1747
1648 neigh = neigh_lookup(tbl, dst, dev); 1748 neigh = neigh_lookup(tbl, dst, dev);
1649 if (neigh == NULL) { 1749 if (neigh == NULL) {
1650 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1750 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1651 err = -ENOENT; 1751 err = -ENOENT;
1652 goto out_dev_put; 1752 goto out;
1653 } 1753 }
1654 1754
1655 neigh = __neigh_lookup_errno(tbl, dst, dev); 1755 neigh = __neigh_lookup_errno(tbl, dst, dev);
1656 if (IS_ERR(neigh)) { 1756 if (IS_ERR(neigh)) {
1657 err = PTR_ERR(neigh); 1757 err = PTR_ERR(neigh);
1658 goto out_dev_put; 1758 goto out;
1659 } 1759 }
1660 } else { 1760 } else {
1661 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1761 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1662 err = -EEXIST; 1762 err = -EEXIST;
1663 neigh_release(neigh); 1763 neigh_release(neigh);
1664 goto out_dev_put; 1764 goto out;
1665 } 1765 }
1666 1766
1667 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1767 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1674 } else 1774 } else
1675 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); 1775 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1676 neigh_release(neigh); 1776 neigh_release(neigh);
1677 goto out_dev_put; 1777 goto out;
1678 } 1778 }
1679 1779
1680 read_unlock(&neigh_tbl_lock); 1780 read_unlock(&neigh_tbl_lock);
1681 err = -EAFNOSUPPORT; 1781 err = -EAFNOSUPPORT;
1682
1683out_dev_put:
1684 if (dev)
1685 dev_put(dev);
1686out: 1782out:
1687 return err; 1783 return err;
1688} 1784}
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1748 unsigned long now = jiffies; 1844 unsigned long now = jiffies;
1749 unsigned int flush_delta = now - tbl->last_flush; 1845 unsigned int flush_delta = now - tbl->last_flush;
1750 unsigned int rand_delta = now - tbl->last_rand; 1846 unsigned int rand_delta = now - tbl->last_rand;
1751 1847 struct neigh_hash_table *nht;
1752 struct ndt_config ndc = { 1848 struct ndt_config ndc = {
1753 .ndtc_key_len = tbl->key_len, 1849 .ndtc_key_len = tbl->key_len,
1754 .ndtc_entry_size = tbl->entry_size, 1850 .ndtc_entry_size = tbl->entry_size,
1755 .ndtc_entries = atomic_read(&tbl->entries), 1851 .ndtc_entries = atomic_read(&tbl->entries),
1756 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 1852 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1757 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 1853 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1758 .ndtc_hash_rnd = tbl->hash_rnd,
1759 .ndtc_hash_mask = tbl->hash_mask,
1760 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 1854 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1761 }; 1855 };
1762 1856
1857 rcu_read_lock_bh();
1858 nht = rcu_dereference_bh(tbl->nht);
1859 ndc.ndtc_hash_rnd = nht->hash_rnd;
1860 ndc.ndtc_hash_mask = nht->hash_mask;
1861 rcu_read_unlock_bh();
1862
1763 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); 1863 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1764 } 1864 }
1765 1865
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2056 2156
2057 read_lock_bh(&neigh->lock); 2157 read_lock_bh(&neigh->lock);
2058 ndm->ndm_state = neigh->nud_state; 2158 ndm->ndm_state = neigh->nud_state;
2059 if ((neigh->nud_state & NUD_VALID) && 2159 if (neigh->nud_state & NUD_VALID) {
2060 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { 2160 char haddr[MAX_ADDR_LEN];
2061 read_unlock_bh(&neigh->lock); 2161
2062 goto nla_put_failure; 2162 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164 read_unlock_bh(&neigh->lock);
2165 goto nla_put_failure;
2166 }
2063 } 2167 }
2064 2168
2065 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2169 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)
2087static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2191static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2088 struct netlink_callback *cb) 2192 struct netlink_callback *cb)
2089{ 2193{
2090 struct net * net = sock_net(skb->sk); 2194 struct net *net = sock_net(skb->sk);
2091 struct neighbour *n; 2195 struct neighbour *n;
2092 int rc, h, s_h = cb->args[1]; 2196 int rc, h, s_h = cb->args[1];
2093 int idx, s_idx = idx = cb->args[2]; 2197 int idx, s_idx = idx = cb->args[2];
2198 struct neigh_hash_table *nht;
2094 2199
2095 read_lock_bh(&tbl->lock); 2200 rcu_read_lock_bh();
2096 for (h = 0; h <= tbl->hash_mask; h++) { 2201 nht = rcu_dereference_bh(tbl->nht);
2202
2203 for (h = 0; h <= nht->hash_mask; h++) {
2097 if (h < s_h) 2204 if (h < s_h)
2098 continue; 2205 continue;
2099 if (h > s_h) 2206 if (h > s_h)
2100 s_idx = 0; 2207 s_idx = 0;
2101 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { 2208 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2209 n != NULL;
2210 n = rcu_dereference_bh(n->next)) {
2102 if (!net_eq(dev_net(n->dev), net)) 2211 if (!net_eq(dev_net(n->dev), net))
2103 continue; 2212 continue;
2104 if (idx < s_idx) 2213 if (idx < s_idx)
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2107 cb->nlh->nlmsg_seq, 2216 cb->nlh->nlmsg_seq,
2108 RTM_NEWNEIGH, 2217 RTM_NEWNEIGH,
2109 NLM_F_MULTI) <= 0) { 2218 NLM_F_MULTI) <= 0) {
2110 read_unlock_bh(&tbl->lock);
2111 rc = -1; 2219 rc = -1;
2112 goto out; 2220 goto out;
2113 } 2221 }
2114 next: 2222next:
2115 idx++; 2223 idx++;
2116 } 2224 }
2117 } 2225 }
2118 read_unlock_bh(&tbl->lock);
2119 rc = skb->len; 2226 rc = skb->len;
2120out: 2227out:
2228 rcu_read_unlock_bh();
2121 cb->args[1] = h; 2229 cb->args[1] = h;
2122 cb->args[2] = idx; 2230 cb->args[2] = idx;
2123 return rc; 2231 return rc;
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2150void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2258void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2151{ 2259{
2152 int chain; 2260 int chain;
2261 struct neigh_hash_table *nht;
2153 2262
2154 read_lock_bh(&tbl->lock); 2263 rcu_read_lock_bh();
2155 for (chain = 0; chain <= tbl->hash_mask; chain++) { 2264 nht = rcu_dereference_bh(tbl->nht);
2265
2266 read_lock(&tbl->lock); /* avoid resizes */
2267 for (chain = 0; chain <= nht->hash_mask; chain++) {
2156 struct neighbour *n; 2268 struct neighbour *n;
2157 2269
2158 for (n = tbl->hash_buckets[chain]; n; n = n->next) 2270 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2271 n != NULL;
2272 n = rcu_dereference_bh(n->next))
2159 cb(n, cookie); 2273 cb(n, cookie);
2160 } 2274 }
2161 read_unlock_bh(&tbl->lock); 2275 read_unlock(&tbl->lock);
2276 rcu_read_unlock_bh();
2162} 2277}
2163EXPORT_SYMBOL(neigh_for_each); 2278EXPORT_SYMBOL(neigh_for_each);
2164 2279
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
2167 int (*cb)(struct neighbour *)) 2282 int (*cb)(struct neighbour *))
2168{ 2283{
2169 int chain; 2284 int chain;
2285 struct neigh_hash_table *nht;
2170 2286
2171 for (chain = 0; chain <= tbl->hash_mask; chain++) { 2287 nht = rcu_dereference_protected(tbl->nht,
2172 struct neighbour *n, **np; 2288 lockdep_is_held(&tbl->lock));
2289 for (chain = 0; chain <= nht->hash_mask; chain++) {
2290 struct neighbour *n;
2291 struct neighbour __rcu **np;
2173 2292
2174 np = &tbl->hash_buckets[chain]; 2293 np = &nht->hash_buckets[chain];
2175 while ((n = *np) != NULL) { 2294 while ((n = rcu_dereference_protected(*np,
2295 lockdep_is_held(&tbl->lock))) != NULL) {
2176 int release; 2296 int release;
2177 2297
2178 write_lock(&n->lock); 2298 write_lock(&n->lock);
2179 release = cb(n); 2299 release = cb(n);
2180 if (release) { 2300 if (release) {
2181 *np = n->next; 2301 rcu_assign_pointer(*np,
2302 rcu_dereference_protected(n->next,
2303 lockdep_is_held(&tbl->lock)));
2182 n->dead = 1; 2304 n->dead = 1;
2183 } else 2305 } else
2184 np = &n->next; 2306 np = &n->next;
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2196{ 2318{
2197 struct neigh_seq_state *state = seq->private; 2319 struct neigh_seq_state *state = seq->private;
2198 struct net *net = seq_file_net(seq); 2320 struct net *net = seq_file_net(seq);
2199 struct neigh_table *tbl = state->tbl; 2321 struct neigh_hash_table *nht = state->nht;
2200 struct neighbour *n = NULL; 2322 struct neighbour *n = NULL;
2201 int bucket = state->bucket; 2323 int bucket = state->bucket;
2202 2324
2203 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 2325 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2204 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { 2326 for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2205 n = tbl->hash_buckets[bucket]; 2327 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2206 2328
2207 while (n) { 2329 while (n) {
2208 if (!net_eq(dev_net(n->dev), net)) 2330 if (!net_eq(dev_net(n->dev), net))
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2219 break; 2341 break;
2220 if (n->nud_state & ~NUD_NOARP) 2342 if (n->nud_state & ~NUD_NOARP)
2221 break; 2343 break;
2222 next: 2344next:
2223 n = n->next; 2345 n = rcu_dereference_bh(n->next);
2224 } 2346 }
2225 2347
2226 if (n) 2348 if (n)
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2237{ 2359{
2238 struct neigh_seq_state *state = seq->private; 2360 struct neigh_seq_state *state = seq->private;
2239 struct net *net = seq_file_net(seq); 2361 struct net *net = seq_file_net(seq);
2240 struct neigh_table *tbl = state->tbl; 2362 struct neigh_hash_table *nht = state->nht;
2241 2363
2242 if (state->neigh_sub_iter) { 2364 if (state->neigh_sub_iter) {
2243 void *v = state->neigh_sub_iter(state, n, pos); 2365 void *v = state->neigh_sub_iter(state, n, pos);
2244 if (v) 2366 if (v)
2245 return n; 2367 return n;
2246 } 2368 }
2247 n = n->next; 2369 n = rcu_dereference_bh(n->next);
2248 2370
2249 while (1) { 2371 while (1) {
2250 while (n) { 2372 while (n) {
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2261 2383
2262 if (n->nud_state & ~NUD_NOARP) 2384 if (n->nud_state & ~NUD_NOARP)
2263 break; 2385 break;
2264 next: 2386next:
2265 n = n->next; 2387 n = rcu_dereference_bh(n->next);
2266 } 2388 }
2267 2389
2268 if (n) 2390 if (n)
2269 break; 2391 break;
2270 2392
2271 if (++state->bucket > tbl->hash_mask) 2393 if (++state->bucket > nht->hash_mask)
2272 break; 2394 break;
2273 2395
2274 n = tbl->hash_buckets[state->bucket]; 2396 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2275 } 2397 }
2276 2398
2277 if (n && pos) 2399 if (n && pos)
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2369} 2491}
2370 2492
2371void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2493void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2372 __acquires(tbl->lock) 2494 __acquires(rcu_bh)
2373{ 2495{
2374 struct neigh_seq_state *state = seq->private; 2496 struct neigh_seq_state *state = seq->private;
2375 2497
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
2377 state->bucket = 0; 2499 state->bucket = 0;
2378 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 2500 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2379 2501
2380 read_lock_bh(&tbl->lock); 2502 rcu_read_lock_bh();
2503 state->nht = rcu_dereference_bh(tbl->nht);
2381 2504
2382 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 2505 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2383} 2506}
@@ -2411,12 +2534,9 @@ out:
2411EXPORT_SYMBOL(neigh_seq_next); 2534EXPORT_SYMBOL(neigh_seq_next);
2412 2535
2413void neigh_seq_stop(struct seq_file *seq, void *v) 2536void neigh_seq_stop(struct seq_file *seq, void *v)
2414 __releases(tbl->lock) 2537 __releases(rcu_bh)
2415{ 2538{
2416 struct neigh_seq_state *state = seq->private; 2539 rcu_read_unlock_bh();
2417 struct neigh_table *tbl = state->tbl;
2418
2419 read_unlock_bh(&tbl->lock);
2420} 2540}
2421EXPORT_SYMBOL(neigh_seq_stop); 2541EXPORT_SYMBOL(neigh_seq_stop);
2422 2542
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a0..a5ff5a89f376 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
515 return attribute->store(queue, attribute, buf, count); 515 return attribute->store(queue, attribute, buf, count);
516} 516}
517 517
518static struct sysfs_ops rx_queue_sysfs_ops = { 518static const struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show, 519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store, 520 .store = rx_queue_attr_store,
521}; 521};
@@ -598,7 +598,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
598 } 598 }
599 599
600 spin_lock(&rps_map_lock); 600 spin_lock(&rps_map_lock);
601 old_map = queue->rps_map; 601 old_map = rcu_dereference_protected(queue->rps_map,
602 lockdep_is_held(&rps_map_lock));
602 rcu_assign_pointer(queue->rps_map, map); 603 rcu_assign_pointer(queue->rps_map, map);
603 spin_unlock(&rps_map_lock); 604 spin_unlock(&rps_map_lock);
604 605
@@ -677,7 +678,8 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
677 table = NULL; 678 table = NULL;
678 679
679 spin_lock(&rps_dev_flow_lock); 680 spin_lock(&rps_dev_flow_lock);
680 old_table = queue->rps_flow_table; 681 old_table = rcu_dereference_protected(queue->rps_flow_table,
682 lockdep_is_held(&rps_dev_flow_lock));
681 rcu_assign_pointer(queue->rps_flow_table, table); 683 rcu_assign_pointer(queue->rps_flow_table, table);
682 spin_unlock(&rps_dev_flow_lock); 684 spin_unlock(&rps_dev_flow_lock);
683 685
@@ -705,13 +707,17 @@ static void rx_queue_release(struct kobject *kobj)
705{ 707{
706 struct netdev_rx_queue *queue = to_rx_queue(kobj); 708 struct netdev_rx_queue *queue = to_rx_queue(kobj);
707 struct netdev_rx_queue *first = queue->first; 709 struct netdev_rx_queue *first = queue->first;
710 struct rps_map *map;
711 struct rps_dev_flow_table *flow_table;
708 712
709 if (queue->rps_map)
710 call_rcu(&queue->rps_map->rcu, rps_map_release);
711 713
712 if (queue->rps_flow_table) 714 map = rcu_dereference_raw(queue->rps_map);
713 call_rcu(&queue->rps_flow_table->rcu, 715 if (map)
714 rps_dev_flow_table_release); 716 call_rcu(&map->rcu, rps_map_release);
717
718 flow_table = rcu_dereference_raw(queue->rps_flow_table);
719 if (flow_table)
720 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
715 721
716 if (atomic_dec_and_test(&first->count)) 722 if (atomic_dec_and_test(&first->count))
717 kfree(first); 723 kfree(first);
@@ -726,6 +732,7 @@ static struct kobj_type rx_queue_ktype = {
726static int rx_queue_add_kobject(struct net_device *net, int index) 732static int rx_queue_add_kobject(struct net_device *net, int index)
727{ 733{
728 struct netdev_rx_queue *queue = net->_rx + index; 734 struct netdev_rx_queue *queue = net->_rx + index;
735 struct netdev_rx_queue *first = queue->first;
729 struct kobject *kobj = &queue->kobj; 736 struct kobject *kobj = &queue->kobj;
730 int error = 0; 737 int error = 0;
731 738
@@ -738,38 +745,43 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
738 } 745 }
739 746
740 kobject_uevent(kobj, KOBJ_ADD); 747 kobject_uevent(kobj, KOBJ_ADD);
748 atomic_inc(&first->count);
741 749
742 return error; 750 return error;
743} 751}
744 752
745static int rx_queue_register_kobjects(struct net_device *net) 753int
754net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
746{ 755{
747 int i; 756 int i;
748 int error = 0; 757 int error = 0;
749 758
750 net->queues_kset = kset_create_and_add("queues", 759 for (i = old_num; i < new_num; i++) {
751 NULL, &net->dev.kobj);
752 if (!net->queues_kset)
753 return -ENOMEM;
754 for (i = 0; i < net->num_rx_queues; i++) {
755 error = rx_queue_add_kobject(net, i); 760 error = rx_queue_add_kobject(net, i);
756 if (error) 761 if (error) {
762 new_num = old_num;
757 break; 763 break;
764 }
758 } 765 }
759 766
760 if (error) 767 while (--i >= new_num)
761 while (--i >= 0) 768 kobject_put(&net->_rx[i].kobj);
762 kobject_put(&net->_rx[i].kobj);
763 769
764 return error; 770 return error;
765} 771}
766 772
767static void rx_queue_remove_kobjects(struct net_device *net) 773static int rx_queue_register_kobjects(struct net_device *net)
768{ 774{
769 int i; 775 net->queues_kset = kset_create_and_add("queues",
776 NULL, &net->dev.kobj);
777 if (!net->queues_kset)
778 return -ENOMEM;
779 return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
780}
770 781
771 for (i = 0; i < net->num_rx_queues; i++) 782static void rx_queue_remove_kobjects(struct net_device *net)
772 kobject_put(&net->_rx[i].kobj); 783{
784 net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
773 kset_unregister(net->queues_kset); 785 kset_unregister(net->queues_kset);
774} 786}
775#endif /* CONFIG_RPS */ 787#endif /* CONFIG_RPS */
@@ -789,12 +801,13 @@ static const void *net_netlink_ns(struct sock *sk)
789 return sock_net(sk); 801 return sock_net(sk);
790} 802}
791 803
792static struct kobj_ns_type_operations net_ns_type_operations = { 804struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET, 805 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns, 806 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns, 807 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns, 808 .initial_ns = net_initial_ns,
797}; 809};
810EXPORT_SYMBOL_GPL(net_ns_type_operations);
798 811
799static void net_kobj_ns_exit(struct net *net) 812static void net_kobj_ns_exit(struct net *net)
800{ 813{
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b187..778e1571548d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7#ifdef CONFIG_RPS
8int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
9#endif
10
7#endif 11#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c988e685433a..3f860261c5ee 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -42,7 +42,9 @@ static int net_assign_generic(struct net *net, int id, void *data)
42 BUG_ON(!mutex_is_locked(&net_mutex)); 42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0); 43 BUG_ON(id == 0);
44 44
45 ng = old_ng = net->gen; 45 old_ng = rcu_dereference_protected(net->gen,
46 lockdep_is_held(&net_mutex));
47 ng = old_ng;
46 if (old_ng->len >= id) 48 if (old_ng->len >= id)
47 goto assign; 49 goto assign;
48 50
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 537e01afd81b..4e98ffac3af0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev)
288 return 0; 288 return 0;
289} 289}
290 290
291void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 291void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
292 struct net_device *dev)
292{ 293{
293 int status = NETDEV_TX_BUSY; 294 int status = NETDEV_TX_BUSY;
294 unsigned long tries; 295 unsigned long tries;
295 struct net_device *dev = np->dev;
296 const struct net_device_ops *ops = dev->netdev_ops; 296 const struct net_device_ops *ops = dev->netdev_ops;
297 /* It is up to the caller to keep npinfo alive. */ 297 /* It is up to the caller to keep npinfo alive. */
298 struct netpoll_info *npinfo = np->dev->npinfo; 298 struct netpoll_info *npinfo = np->dev->npinfo;
@@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
346 schedule_delayed_work(&npinfo->tx_work,0); 346 schedule_delayed_work(&npinfo->tx_work,0);
347 } 347 }
348} 348}
349EXPORT_SYMBOL(netpoll_send_skb); 349EXPORT_SYMBOL(netpoll_send_skb_on_dev);
350 350
351void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 351void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
352{ 352{
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010d..33bc3823ac6f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -729,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
729 *num = 0; 729 *num = 0;
730 730
731 for (; i < maxlen; i++) { 731 for (; i < maxlen; i++) {
732 int value;
732 char c; 733 char c;
733 *num <<= 4; 734 *num <<= 4;
734 if (get_user(c, &user_buffer[i])) 735 if (get_user(c, &user_buffer[i]))
735 return -EFAULT; 736 return -EFAULT;
736 if ((c >= '0') && (c <= '9')) 737 value = hex_to_bin(c);
737 *num |= c - '0'; 738 if (value >= 0)
738 else if ((c >= 'a') && (c <= 'f')) 739 *num |= value;
739 *num |= c - 'a' + 10;
740 else if ((c >= 'A') && (c <= 'F'))
741 *num |= c - 'A' + 10;
742 else 740 else
743 break; 741 break;
744 } 742 }
@@ -773,10 +771,10 @@ done:
773static unsigned long num_arg(const char __user * user_buffer, 771static unsigned long num_arg(const char __user * user_buffer,
774 unsigned long maxlen, unsigned long *num) 772 unsigned long maxlen, unsigned long *num)
775{ 773{
776 int i = 0; 774 int i;
777 *num = 0; 775 *num = 0;
778 776
779 for (; i < maxlen; i++) { 777 for (i = 0; i < maxlen; i++) {
780 char c; 778 char c;
781 if (get_user(c, &user_buffer[i])) 779 if (get_user(c, &user_buffer[i]))
782 return -EFAULT; 780 return -EFAULT;
@@ -791,9 +789,9 @@ static unsigned long num_arg(const char __user * user_buffer,
791 789
792static int strn_len(const char __user * user_buffer, unsigned int maxlen) 790static int strn_len(const char __user * user_buffer, unsigned int maxlen)
793{ 791{
794 int i = 0; 792 int i;
795 793
796 for (; i < maxlen; i++) { 794 for (i = 0; i < maxlen; i++) {
797 char c; 795 char c;
798 if (get_user(c, &user_buffer[i])) 796 if (get_user(c, &user_buffer[i]))
799 return -EFAULT; 797 return -EFAULT;
@@ -848,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
848{ 846{
849 struct seq_file *seq = file->private_data; 847 struct seq_file *seq = file->private_data;
850 struct pktgen_dev *pkt_dev = seq->private; 848 struct pktgen_dev *pkt_dev = seq->private;
851 int i = 0, max, len; 849 int i, max, len;
852 char name[16], valstr[32]; 850 char name[16], valstr[32];
853 unsigned long value = 0; 851 unsigned long value = 0;
854 char *pg_result = NULL; 852 char *pg_result = NULL;
@@ -862,13 +860,13 @@ static ssize_t pktgen_if_write(struct file *file,
862 return -EINVAL; 860 return -EINVAL;
863 } 861 }
864 862
865 max = count - i; 863 max = count;
866 tmp = count_trail_chars(&user_buffer[i], max); 864 tmp = count_trail_chars(user_buffer, max);
867 if (tmp < 0) { 865 if (tmp < 0) {
868 pr_warning("illegal format\n"); 866 pr_warning("illegal format\n");
869 return tmp; 867 return tmp;
870 } 868 }
871 i += tmp; 869 i = tmp;
872 870
873 /* Read variable name */ 871 /* Read variable name */
874 872
@@ -889,10 +887,11 @@ static ssize_t pktgen_if_write(struct file *file,
889 i += len; 887 i += len;
890 888
891 if (debug) { 889 if (debug) {
892 char tb[count + 1]; 890 size_t copy = min_t(size_t, count, 1023);
893 if (copy_from_user(tb, user_buffer, count)) 891 char tb[copy + 1];
892 if (copy_from_user(tb, user_buffer, copy))
894 return -EFAULT; 893 return -EFAULT;
895 tb[count] = 0; 894 tb[copy] = 0;
896 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name, 895 printk(KERN_DEBUG "pktgen: %s,%lu buffer -:%s:-\n", name,
897 (unsigned long)count, tb); 896 (unsigned long)count, tb);
898 } 897 }
@@ -1766,7 +1765,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1766{ 1765{
1767 struct seq_file *seq = file->private_data; 1766 struct seq_file *seq = file->private_data;
1768 struct pktgen_thread *t = seq->private; 1767 struct pktgen_thread *t = seq->private;
1769 int i = 0, max, len, ret; 1768 int i, max, len, ret;
1770 char name[40]; 1769 char name[40];
1771 char *pg_result; 1770 char *pg_result;
1772 1771
@@ -1775,12 +1774,12 @@ static ssize_t pktgen_thread_write(struct file *file,
1775 return -EINVAL; 1774 return -EINVAL;
1776 } 1775 }
1777 1776
1778 max = count - i; 1777 max = count;
1779 len = count_trail_chars(&user_buffer[i], max); 1778 len = count_trail_chars(user_buffer, max);
1780 if (len < 0) 1779 if (len < 0)
1781 return len; 1780 return len;
1782 1781
1783 i += len; 1782 i = len;
1784 1783
1785 /* Read variable name */ 1784 /* Read variable name */
1786 1785
@@ -1977,7 +1976,7 @@ static struct net_device *pktgen_dev_get_by_name(struct pktgen_dev *pkt_dev,
1977 const char *ifname) 1976 const char *ifname)
1978{ 1977{
1979 char b[IFNAMSIZ+5]; 1978 char b[IFNAMSIZ+5];
1980 int i = 0; 1979 int i;
1981 1980
1982 for (i = 0; ifname[i] != '@'; i++) { 1981 for (i = 0; ifname[i] != '@'; i++) {
1983 if (i == IFNAMSIZ) 1982 if (i == IFNAMSIZ)
@@ -2521,8 +2520,8 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
2521{ 2520{
2522 if (pkt_dev->cflows) { 2521 if (pkt_dev->cflows) {
2523 /* let go of the SAs if we have them */ 2522 /* let go of the SAs if we have them */
2524 int i = 0; 2523 int i;
2525 for (; i < pkt_dev->cflows; i++) { 2524 for (i = 0; i < pkt_dev->cflows; i++) {
2526 struct xfrm_state *x = pkt_dev->flows[i].x; 2525 struct xfrm_state *x = pkt_dev->flows[i].x;
2527 if (x) { 2526 if (x) {
2528 xfrm_state_put(x); 2527 xfrm_state_put(x);
@@ -2613,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2613 /* Update any of the values, used when we're incrementing various 2612 /* Update any of the values, used when we're incrementing various
2614 * fields. 2613 * fields.
2615 */ 2614 */
2616 queue_map = pkt_dev->cur_queue_map;
2617 mod_cur_headers(pkt_dev); 2615 mod_cur_headers(pkt_dev);
2616 queue_map = pkt_dev->cur_queue_map;
2618 2617
2619 datalen = (odev->hard_header_len + 16) & ~0xf; 2618 datalen = (odev->hard_header_len + 16) & ~0xf;
2620 2619
@@ -2977,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2977 /* Update any of the values, used when we're incrementing various 2976 /* Update any of the values, used when we're incrementing various
2978 * fields. 2977 * fields.
2979 */ 2978 */
2980 queue_map = pkt_dev->cur_queue_map;
2981 mod_cur_headers(pkt_dev); 2979 mod_cur_headers(pkt_dev);
2980 queue_map = pkt_dev->cur_queue_map;
2982 2981
2983 skb = __netdev_alloc_skb(odev, 2982 skb = __netdev_alloc_skb(odev,
2984 pkt_dev->cur_pkt_size + 64 2983 pkt_dev->cur_pkt_size + 64
@@ -3907,8 +3906,6 @@ static void __exit pg_cleanup(void)
3907{ 3906{
3908 struct pktgen_thread *t; 3907 struct pktgen_thread *t;
3909 struct list_head *q, *n; 3908 struct list_head *q, *n;
3910 wait_queue_head_t queue;
3911 init_waitqueue_head(&queue);
3912 3909
3913 /* Stop all interfaces & threads */ 3910 /* Stop all interfaces & threads */
3914 3911
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd935..841c287ef40a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
299 unregister_netdevice_many(&list_kill); 299 unregister_netdevice_many(&list_kill);
300} 300}
301 301
302void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
303{
304 rtnl_lock();
305 __rtnl_kill_links(net, ops);
306 rtnl_unlock();
307}
308EXPORT_SYMBOL_GPL(rtnl_kill_links);
309
310/** 302/**
311 * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. 303 * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
312 * @ops: struct rtnl_link_ops * to unregister 304 * @ops: struct rtnl_link_ops * to unregister
@@ -355,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
355 if (!ops) 347 if (!ops)
356 return 0; 348 return 0;
357 349
358 size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ 350 size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
359 nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ 351 nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */
360 352
361 if (ops->get_size) 353 if (ops->get_size)
362 /* IFLA_INFO_DATA + nested data */ 354 /* IFLA_INFO_DATA + nested data */
363 size += nlmsg_total_size(sizeof(struct nlattr)) + 355 size += nla_total_size(sizeof(struct nlattr)) +
364 ops->get_size(dev); 356 ops->get_size(dev);
365 357
366 if (ops->get_xstats_size) 358 if (ops->get_xstats_size)
367 size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ 359 /* IFLA_INFO_XSTATS */
360 size += nla_total_size(ops->get_xstats_size(dev));
368 361
369 return size; 362 return size;
370} 363}
@@ -612,36 +605,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
612 605
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 606static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 607{
615 struct rtnl_link_stats64 a; 608 memcpy(v, b, sizeof(*b));
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645} 609}
646 610
647/* All VF info */ 611/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56ba3c4e4761..104f8444754a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 202 skb->data = data;
203 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 205#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 206 skb->mac_header = ~0U;
209#endif 207#endif
@@ -249,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
249struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 247struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 unsigned int length, gfp_t gfp_mask) 248 unsigned int length, gfp_t gfp_mask)
251{ 249{
252 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
253 struct sk_buff *skb; 250 struct sk_buff *skb;
254 251
255 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); 252 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
256 if (likely(skb)) { 253 if (likely(skb)) {
257 skb_reserve(skb, NET_SKB_PAD); 254 skb_reserve(skb, NET_SKB_PAD);
258 skb->dev = dev; 255 skb->dev = dev;
@@ -261,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
261} 258}
262EXPORT_SYMBOL(__netdev_alloc_skb); 259EXPORT_SYMBOL(__netdev_alloc_skb);
263 260
264struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
265{
266 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
267 struct page *page;
268
269 page = alloc_pages_node(node, gfp_mask, 0);
270 return page;
271}
272EXPORT_SYMBOL(__netdev_alloc_page);
273
274void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 261void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
275 int size) 262 int size)
276{ 263{
@@ -340,7 +327,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 327 put_page(skb_shinfo(skb)->frags[i].page);
341 } 328 }
342 329
343 if (skb_has_frags(skb)) 330 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 331 skb_drop_fraglist(skb);
345 332
346 kfree(skb->head); 333 kfree(skb->head);
@@ -686,16 +673,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
686 673
687struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 674struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
688{ 675{
689 int headerlen = skb->data - skb->head; 676 int headerlen = skb_headroom(skb);
690 /* 677 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
691 * Allocate the copy buffer 678 struct sk_buff *n = alloc_skb(size, gfp_mask);
692 */ 679
693 struct sk_buff *n;
694#ifdef NET_SKBUFF_DATA_USES_OFFSET
695 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
696#else
697 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
698#endif
699 if (!n) 680 if (!n)
700 return NULL; 681 return NULL;
701 682
@@ -727,20 +708,14 @@ EXPORT_SYMBOL(skb_copy);
727 708
728struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 709struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
729{ 710{
730 /* 711 unsigned int size = skb_end_pointer(skb) - skb->head;
731 * Allocate the copy buffer 712 struct sk_buff *n = alloc_skb(size, gfp_mask);
732 */ 713
733 struct sk_buff *n;
734#ifdef NET_SKBUFF_DATA_USES_OFFSET
735 n = alloc_skb(skb->end, gfp_mask);
736#else
737 n = alloc_skb(skb->end - skb->head, gfp_mask);
738#endif
739 if (!n) 714 if (!n)
740 goto out; 715 goto out;
741 716
742 /* Set the data pointer */ 717 /* Set the data pointer */
743 skb_reserve(n, skb->data - skb->head); 718 skb_reserve(n, skb_headroom(skb));
744 /* Set the tail pointer and length */ 719 /* Set the tail pointer and length */
745 skb_put(n, skb_headlen(skb)); 720 skb_put(n, skb_headlen(skb));
746 /* Copy the bytes */ 721 /* Copy the bytes */
@@ -760,7 +735,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
760 skb_shinfo(n)->nr_frags = i; 735 skb_shinfo(n)->nr_frags = i;
761 } 736 }
762 737
763 if (skb_has_frags(skb)) { 738 if (skb_has_frag_list(skb)) {
764 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 739 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
765 skb_clone_fraglist(n); 740 skb_clone_fraglist(n);
766 } 741 }
@@ -792,12 +767,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
792{ 767{
793 int i; 768 int i;
794 u8 *data; 769 u8 *data;
795#ifdef NET_SKBUFF_DATA_USES_OFFSET 770 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
796 int size = nhead + skb->end + ntail;
797#else
798 int size = nhead + (skb->end - skb->head) + ntail;
799#endif
800 long off; 771 long off;
772 bool fastpath;
801 773
802 BUG_ON(nhead < 0); 774 BUG_ON(nhead < 0);
803 775
@@ -811,23 +783,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
811 goto nodata; 783 goto nodata;
812 784
813 /* Copy only real data... and, alas, header. This should be 785 /* Copy only real data... and, alas, header. This should be
814 * optimized for the cases when header is void. */ 786 * optimized for the cases when header is void.
815#ifdef NET_SKBUFF_DATA_USES_OFFSET 787 */
816 memcpy(data + nhead, skb->head, skb->tail); 788 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
817#else 789
818 memcpy(data + nhead, skb->head, skb->tail - skb->head); 790 memcpy((struct skb_shared_info *)(data + size),
819#endif 791 skb_shinfo(skb),
820 memcpy(data + size, skb_end_pointer(skb),
821 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 792 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
822 793
823 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 794 /* Check if we can avoid taking references on fragments if we own
824 get_page(skb_shinfo(skb)->frags[i].page); 795 * the last reference on skb->head. (see skb_release_data())
796 */
797 if (!skb->cloned)
798 fastpath = true;
799 else {
800 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
825 801
826 if (skb_has_frags(skb)) 802 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
827 skb_clone_fraglist(skb); 803 }
828 804
829 skb_release_data(skb); 805 if (fastpath) {
806 kfree(skb->head);
807 } else {
808 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
809 get_page(skb_shinfo(skb)->frags[i].page);
830 810
811 if (skb_has_frag_list(skb))
812 skb_clone_fraglist(skb);
813
814 skb_release_data(skb);
815 }
831 off = (data + nhead) - skb->head; 816 off = (data + nhead) - skb->head;
832 817
833 skb->head = data; 818 skb->head = data;
@@ -1100,7 +1085,7 @@ drop_pages:
1100 for (; i < nfrags; i++) 1085 for (; i < nfrags; i++)
1101 put_page(skb_shinfo(skb)->frags[i].page); 1086 put_page(skb_shinfo(skb)->frags[i].page);
1102 1087
1103 if (skb_has_frags(skb)) 1088 if (skb_has_frag_list(skb))
1104 skb_drop_fraglist(skb); 1089 skb_drop_fraglist(skb);
1105 goto done; 1090 goto done;
1106 } 1091 }
@@ -1195,7 +1180,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1195 /* Optimization: no fragments, no reasons to preestimate 1180 /* Optimization: no fragments, no reasons to preestimate
1196 * size of pulled pages. Superb. 1181 * size of pulled pages. Superb.
1197 */ 1182 */
1198 if (!skb_has_frags(skb)) 1183 if (!skb_has_frag_list(skb))
1199 goto pull_pages; 1184 goto pull_pages;
1200 1185
1201 /* Estimate size of pulled pages. */ 1186 /* Estimate size of pulled pages. */
@@ -2324,7 +2309,7 @@ next_skb:
2324 st->frag_data = NULL; 2309 st->frag_data = NULL;
2325 } 2310 }
2326 2311
2327 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2312 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2328 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2313 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2329 st->frag_idx = 0; 2314 st->frag_idx = 0;
2330 goto next_skb; 2315 goto next_skb;
@@ -2894,7 +2879,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2894 return -ENOMEM; 2879 return -ENOMEM;
2895 2880
2896 /* Easy case. Most of packets will go this way. */ 2881 /* Easy case. Most of packets will go this way. */
2897 if (!skb_has_frags(skb)) { 2882 if (!skb_has_frag_list(skb)) {
2898 /* A little of trouble, not enough of space for trailer. 2883 /* A little of trouble, not enough of space for trailer.
2899 * This should not happen, when stack is tuned to generate 2884 * This should not happen, when stack is tuned to generate
2900 * good frames. OK, on miss we reallocate and reserve even more 2885 * good frames. OK, on miss we reallocate and reserve even more
@@ -2929,7 +2914,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2929 2914
2930 if (skb1->next == NULL && tailbits) { 2915 if (skb1->next == NULL && tailbits) {
2931 if (skb_shinfo(skb1)->nr_frags || 2916 if (skb_shinfo(skb1)->nr_frags ||
2932 skb_has_frags(skb1) || 2917 skb_has_frag_list(skb1) ||
2933 skb_tailroom(skb1) < tailbits) 2918 skb_tailroom(skb1) < tailbits)
2934 ntail = tailbits + 128; 2919 ntail = tailbits + 128;
2935 } 2920 }
@@ -2938,7 +2923,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2938 skb_cloned(skb1) || 2923 skb_cloned(skb1) ||
2939 ntail || 2924 ntail ||
2940 skb_shinfo(skb1)->nr_frags || 2925 skb_shinfo(skb1)->nr_frags ||
2941 skb_has_frags(skb1)) { 2926 skb_has_frag_list(skb1)) {
2942 struct sk_buff *skb2; 2927 struct sk_buff *skb2;
2943 2928
2944 /* Fuck, we are miserable poor guys... */ 2929 /* Fuck, we are miserable poor guys... */
@@ -3021,7 +3006,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3021 } else { 3006 } else {
3022 /* 3007 /*
3023 * no hardware time stamps available, 3008 * no hardware time stamps available,
3024 * so keep the skb_shared_tx and only 3009 * so keep the shared tx_flags and only
3025 * store software time stamp 3010 * store software time stamp
3026 */ 3011 */
3027 skb->tstamp = ktime_get_real(); 3012 skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index 7d99e13148e6..fb6080111461 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1225,7 +1225,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1225 sock_reset_flag(newsk, SOCK_DONE); 1225 sock_reset_flag(newsk, SOCK_DONE);
1226 skb_queue_head_init(&newsk->sk_error_queue); 1226 skb_queue_head_init(&newsk->sk_error_queue);
1227 1227
1228 filter = newsk->sk_filter; 1228 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1229 if (filter != NULL) 1229 if (filter != NULL)
1230 sk_filter_charge(newsk, filter); 1230 sk_filter_charge(newsk, filter);
1231 1231
@@ -1560,6 +1560,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1560EXPORT_SYMBOL(sock_alloc_send_skb); 1560EXPORT_SYMBOL(sock_alloc_send_skb);
1561 1561
1562static void __lock_sock(struct sock *sk) 1562static void __lock_sock(struct sock *sk)
1563 __releases(&sk->sk_lock.slock)
1564 __acquires(&sk->sk_lock.slock)
1563{ 1565{
1564 DEFINE_WAIT(wait); 1566 DEFINE_WAIT(wait);
1565 1567
@@ -1576,6 +1578,8 @@ static void __lock_sock(struct sock *sk)
1576} 1578}
1577 1579
1578static void __release_sock(struct sock *sk) 1580static void __release_sock(struct sock *sk)
1581 __releases(&sk->sk_lock.slock)
1582 __acquires(&sk->sk_lock.slock)
1579{ 1583{
1580 struct sk_buff *skb = sk->sk_backlog.head; 1584 struct sk_buff *skb = sk->sk_backlog.head;
1581 1585
@@ -1649,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1649{ 1653{
1650 struct proto *prot = sk->sk_prot; 1654 struct proto *prot = sk->sk_prot;
1651 int amt = sk_mem_pages(size); 1655 int amt = sk_mem_pages(size);
1652 int allocated; 1656 long allocated;
1653 1657
1654 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1658 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1655 allocated = atomic_add_return(amt, prot->memory_allocated); 1659 allocated = atomic_long_add_return(amt, prot->memory_allocated);
1656 1660
1657 /* Under limit. */ 1661 /* Under limit. */
1658 if (allocated <= prot->sysctl_mem[0]) { 1662 if (allocated <= prot->sysctl_mem[0]) {
@@ -1710,7 +1714,7 @@ suppress_allocation:
1710 1714
1711 /* Alas. Undo changes. */ 1715 /* Alas. Undo changes. */
1712 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1716 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1713 atomic_sub(amt, prot->memory_allocated); 1717 atomic_long_sub(amt, prot->memory_allocated);
1714 return 0; 1718 return 0;
1715} 1719}
1716EXPORT_SYMBOL(__sk_mem_schedule); 1720EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1723,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
1723{ 1727{
1724 struct proto *prot = sk->sk_prot; 1728 struct proto *prot = sk->sk_prot;
1725 1729
1726 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 1730 atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1727 prot->memory_allocated); 1731 prot->memory_allocated);
1728 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1732 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1729 1733
1730 if (prot->memory_pressure && *prot->memory_pressure && 1734 if (prot->memory_pressure && *prot->memory_pressure &&
1731 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) 1735 (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1732 *prot->memory_pressure = 0; 1736 *prot->memory_pressure = 0;
1733} 1737}
1734EXPORT_SYMBOL(__sk_mem_reclaim); 1738EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2448,12 +2452,12 @@ static char proto_method_implemented(const void *method)
2448 2452
2449static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2453static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2450{ 2454{
2451 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s " 2455 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2452 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2456 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2453 proto->name, 2457 proto->name,
2454 proto->obj_size, 2458 proto->obj_size,
2455 sock_prot_inuse_get(seq_file_net(seq), proto), 2459 sock_prot_inuse_get(seq_file_net(seq), proto),
2456 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, 2460 proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
2457 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", 2461 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2458 proto->max_header, 2462 proto->max_header,
2459 proto->slab == NULL ? "no" : "yes", 2463 proto->slab == NULL ? "no" : "yes",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 01eee5d984be..385b6095fdc4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,7 +34,8 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
34 34
35 mutex_lock(&sock_flow_mutex); 35 mutex_lock(&sock_flow_mutex);
36 36
37 orig_sock_table = rps_sock_flow_table; 37 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
38 lockdep_is_held(&sock_flow_mutex));
38 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 39 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
39 40
40 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 41 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
diff --git a/net/core/utils.c b/net/core/utils.c
index f41854470539..5fea0ab21902 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
75 str++; 75 str++;
76 } 76 }
77 } 77 }
78 return(htonl(l)); 78 return htonl(l);
79} 79}
80EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
81 81
@@ -92,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
92 92
93static inline int xdigit2bin(char c, int delim) 93static inline int xdigit2bin(char c, int delim)
94{ 94{
95 int val;
96
95 if (c == delim || c == '\0') 97 if (c == delim || c == '\0')
96 return IN6PTON_DELIM; 98 return IN6PTON_DELIM;
97 if (c == ':') 99 if (c == ':')
98 return IN6PTON_COLON_MASK; 100 return IN6PTON_COLON_MASK;
99 if (c == '.') 101 if (c == '.')
100 return IN6PTON_DOT; 102 return IN6PTON_DOT;
101 if (c >= '0' && c <= '9') 103
102 return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); 104 val = hex_to_bin(c);
103 if (c >= 'a' && c <= 'f') 105 if (val >= 0)
104 return (IN6PTON_XDIGIT | (c - 'a' + 10)); 106 return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
105 if (c >= 'A' && c <= 'F') 107
106 return (IN6PTON_XDIGIT | (c - 'A' + 10));
107 if (delim == -1) 108 if (delim == -1)
108 return IN6PTON_DELIM; 109 return IN6PTON_DELIM;
109 return IN6PTON_UNKNOWN; 110 return IN6PTON_UNKNOWN;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6df6f8ac9636..75c3582a7678 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -62,22 +62,18 @@ struct ccid_operations {
62 void (*ccid_hc_tx_exit)(struct sock *sk); 62 void (*ccid_hc_tx_exit)(struct sock *sk);
63 void (*ccid_hc_rx_packet_recv)(struct sock *sk, 63 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
64 struct sk_buff *skb); 64 struct sk_buff *skb);
65 int (*ccid_hc_rx_parse_options)(struct sock *sk, 65 int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
66 unsigned char option, 66 u8 opt, u8 *val, u8 len);
67 unsigned char len, u16 idx,
68 unsigned char* value);
69 int (*ccid_hc_rx_insert_options)(struct sock *sk, 67 int (*ccid_hc_rx_insert_options)(struct sock *sk,
70 struct sk_buff *skb); 68 struct sk_buff *skb);
71 void (*ccid_hc_tx_packet_recv)(struct sock *sk, 69 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
72 struct sk_buff *skb); 70 struct sk_buff *skb);
73 int (*ccid_hc_tx_parse_options)(struct sock *sk, 71 int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
74 unsigned char option, 72 u8 opt, u8 *val, u8 len);
75 unsigned char len, u16 idx,
76 unsigned char* value);
77 int (*ccid_hc_tx_send_packet)(struct sock *sk, 73 int (*ccid_hc_tx_send_packet)(struct sock *sk,
78 struct sk_buff *skb); 74 struct sk_buff *skb);
79 void (*ccid_hc_tx_packet_sent)(struct sock *sk, 75 void (*ccid_hc_tx_packet_sent)(struct sock *sk,
80 int more, unsigned int len); 76 unsigned int len);
81 void (*ccid_hc_rx_get_info)(struct sock *sk, 77 void (*ccid_hc_rx_get_info)(struct sock *sk,
82 struct tcp_info *info); 78 struct tcp_info *info);
83 void (*ccid_hc_tx_get_info)(struct sock *sk, 79 void (*ccid_hc_tx_get_info)(struct sock *sk,
@@ -138,20 +134,48 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
138extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); 134extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
139extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); 135extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
140 136
137/*
138 * Congestion control of queued data packets via CCID decision.
139 *
140 * The TX CCID performs its congestion-control by indicating whether and when a
141 * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
142 * The following modes are supported via the symbolic constants below:
143 * - timer-based pacing (CCID returns a delay value in milliseconds);
144 * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
145 */
146
147enum ccid_dequeueing_decision {
148 CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
149 CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
150 CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
151 CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
152 CCID_PACKET_ERR = 0xF0000, /* error condition */
153};
154
155static inline int ccid_packet_dequeue_eval(const int return_code)
156{
157 if (return_code < 0)
158 return CCID_PACKET_ERR;
159 if (return_code == 0)
160 return CCID_PACKET_SEND_AT_ONCE;
161 if (return_code <= CCID_PACKET_DELAY_MAX)
162 return CCID_PACKET_DELAY;
163 return return_code;
164}
165
141static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, 166static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
142 struct sk_buff *skb) 167 struct sk_buff *skb)
143{ 168{
144 int rc = 0;
145 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL) 169 if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
146 rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb); 170 return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
147 return rc; 171 return CCID_PACKET_SEND_AT_ONCE;
148} 172}
149 173
150static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, 174static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
151 int more, unsigned int len) 175 unsigned int len)
152{ 176{
153 if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) 177 if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
154 ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); 178 ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
155} 179}
156 180
157static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, 181static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
@@ -168,27 +192,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
168 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); 192 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
169} 193}
170 194
195/**
196 * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
197 * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
198 * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
199 * @val: value of @opt
200 * @len: length of @val in bytes
201 */
171static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, 202static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
172 unsigned char option, 203 u8 pkt, u8 opt, u8 *val, u8 len)
173 unsigned char len, u16 idx,
174 unsigned char* value)
175{ 204{
176 int rc = 0; 205 if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
177 if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) 206 return 0;
178 rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, 207 return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
179 value);
180 return rc;
181} 208}
182 209
210/**
211 * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
212 * Arguments are analogous to ccid_hc_tx_parse_options()
213 */
183static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, 214static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
184 unsigned char option, 215 u8 pkt, u8 opt, u8 *val, u8 len)
185 unsigned char len, u16 idx,
186 unsigned char* value)
187{ 216{
188 int rc = 0; 217 if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
189 if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) 218 return 0;
190 rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); 219 return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
191 return rc;
192} 220}
193 221
194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, 222static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44e..0581143cb800 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..6576eae9e779 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -123,12 +78,9 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
123 78
124static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 79static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
125{ 80{
126 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 81 if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
127 82 return CCID_PACKET_WILL_DEQUEUE_LATER;
128 if (hc->tx_pipe < hc->tx_cwnd) 83 return CCID_PACKET_SEND_AT_ONCE;
129 return 0;
130
131 return 1; /* XXX CCID should dequeue when ready instead of polling */
132} 84}
133 85
134static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
@@ -156,19 +108,11 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 108 dp->dccps_l_ack_ratio = val;
157} 109}
158 110
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 111static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 112{
169 struct sock *sk = (struct sock *)data; 113 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 114 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s; 115 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
172 116
173 bh_lock_sock(sk); 117 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 118 if (sock_owned_by_user(sk)) {
@@ -178,23 +122,17 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 122
179 ccid2_pr_debug("RTO_EXPIRE\n"); 123 ccid2_pr_debug("RTO_EXPIRE\n");
180 124
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 125 /* back-off timer */
184 hc->tx_rto <<= 1; 126 hc->tx_rto <<= 1;
185 127 if (hc->tx_rto > DCCP_RTO_MAX)
186 s = hc->tx_rto / HZ; 128 hc->tx_rto = DCCP_RTO_MAX;
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 129
192 /* adjust pipe, cwnd etc */ 130 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 131 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 132 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 133 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 134 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 135 hc->tx_pipe = 0;
198 136
199 /* clear state about stuff we sent */ 137 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 138 hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +142,18 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 142 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 143 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 144 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc); 145
146 /* if we were blocked before, we may now send cwnd=1 packet */
147 if (sender_was_blocked)
148 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
149 /* restart backed-off timer */
150 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
208out: 151out:
209 bh_unlock_sock(sk); 152 bh_unlock_sock(sk);
210 sock_put(sk); 153 sock_put(sk);
211} 154}
212 155
213static void ccid2_start_rto_timer(struct sock *sk) 156static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 157{
225 struct dccp_sock *dp = dccp_sk(sk); 158 struct dccp_sock *dp = dccp_sk(sk);
226 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 159 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +163,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 163
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 164 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 165 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 166 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 167
235 next = hc->tx_seqh->ccid2s_next; 168 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 169 /* check if we need to alloc more space */
@@ -296,23 +229,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 229 }
297#endif 230#endif
298 231
299 /* setup RTO timer */ 232 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 233
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 234#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 235 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 236 struct ccid2_seq *seqp = hc->tx_seqt;
306 237
307 while (seqp != hc->tx_seqh) { 238 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 239 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 240 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 241 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 242 seqp = seqp->ccid2s_next;
312 } 243 }
313 } while (0); 244 } while (0);
314 ccid2_pr_debug("=========\n"); 245 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 246#endif
317} 247}
318 248
@@ -378,17 +308,87 @@ out_invalid_option:
378 return -1; 308 return -1;
379} 309}
380 310
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 311/**
312 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
313 * This code is almost identical with TCP's tcp_rtt_estimator(), since
314 * - it has a higher sampling frequency (recommended by RFC 1323),
315 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
316 * - it is simple (cf. more complex proposals such as Eifel timer or research
317 * which suggests that the gain should be set according to window size),
318 * - in tests it was found to work well with CCID2 [gerrit].
319 */
320static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 321{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 322 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
323 long m = mrtt ? : 1;
384 324
385 sk_stop_timer(sk, &hc->tx_rtotimer); 325 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 326 /* First measurement m */
327 hc->tx_srtt = m << 3;
328 hc->tx_mdev = m << 1;
329
330 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
331 hc->tx_rttvar = hc->tx_mdev_max;
332
333 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
334 } else {
335 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
336 m -= (hc->tx_srtt >> 3);
337 hc->tx_srtt += m;
338
339 /* Similarly, update scaled mdev with regard to |m| */
340 if (m < 0) {
341 m = -m;
342 m -= (hc->tx_mdev >> 2);
343 /*
344 * This neutralises RTO increase when RTT < SRTT - mdev
345 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
346 * in Linux TCP", USENIX 2002, pp. 49-62).
347 */
348 if (m > 0)
349 m >>= 3;
350 } else {
351 m -= (hc->tx_mdev >> 2);
352 }
353 hc->tx_mdev += m;
354
355 if (hc->tx_mdev > hc->tx_mdev_max) {
356 hc->tx_mdev_max = hc->tx_mdev;
357 if (hc->tx_mdev_max > hc->tx_rttvar)
358 hc->tx_rttvar = hc->tx_mdev_max;
359 }
360
361 /*
362 * Decay RTTVAR at most once per flight, exploiting that
363 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
364 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
365 * GAR is a useful bound for FlightSize = pipe.
366 * AWL is probably too low here, as it over-estimates pipe.
367 */
368 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
369 if (hc->tx_mdev_max < hc->tx_rttvar)
370 hc->tx_rttvar -= (hc->tx_rttvar -
371 hc->tx_mdev_max) >> 2;
372 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
373 hc->tx_mdev_max = tcp_rto_min(sk);
374 }
375 }
376
377 /*
378 * Set RTO from SRTT and RTTVAR
379 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
380 * This agrees with RFC 4341, 5:
381 * "Because DCCP does not retransmit data, DCCP does not require
382 * TCP's recommended minimum timeout of one second".
383 */
384 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
385
386 if (hc->tx_rto > DCCP_RTO_MAX)
387 hc->tx_rto = DCCP_RTO_MAX;
387} 388}
388 389
389static inline void ccid2_new_ack(struct sock *sk, 390static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 391 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 392{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 394
@@ -402,93 +402,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 402 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 403 hc->tx_packets_acked = 0;
404 } 404 }
405 405 /*
406 /* update RTO */ 406 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 407 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 408 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 409 * in the Ack Vector have earlier sending times.
410 int s; 410 * The cleanest solution is to not use the ccid2s_sent field at all
411 411 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 412 */
413 if (hc->tx_srtt == -1) { 413 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 414}
481 415
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 416static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 417{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 418 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 419
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 420 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 421 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 422 return;
489 } 423 }
490 424
491 hc->tx_last_cong = jiffies; 425 hc->tx_last_cong = ccid2_time_stamp;
492 426
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 427 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 428 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -502,6 +436,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
502{ 436{
503 struct dccp_sock *dp = dccp_sk(sk); 437 struct dccp_sock *dp = dccp_sk(sk);
504 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 438 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
439 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
505 u64 ackno, seqno; 440 u64 ackno, seqno;
506 struct ccid2_seq *seqp; 441 struct ccid2_seq *seqp;
507 unsigned char *vector; 442 unsigned char *vector;
@@ -510,7 +445,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 445 int done = 0;
511 unsigned int maxincr = 0; 446 unsigned int maxincr = 0;
512 447
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 448 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 449 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 450
@@ -620,7 +554,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 554 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 555 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 556 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 557 hc->tx_pipe--;
624 } 558 }
625 if (seqp == hc->tx_seqt) { 559 if (seqp == hc->tx_seqt) {
626 done = 1; 560 done = 1;
@@ -677,7 +611,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 611 * one ack vector.
678 */ 612 */
679 ccid2_congestion_event(sk, seqp); 613 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 614 hc->tx_pipe--;
681 } 615 }
682 if (seqp == hc->tx_seqt) 616 if (seqp == hc->tx_seqt)
683 break; 617 break;
@@ -695,7 +629,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 629 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 630 }
697 631
698 ccid2_hc_tx_check_sanity(hc); 632 /* restart RTO timer if not all outstanding data has been acked */
633 if (hc->tx_pipe == 0)
634 sk_stop_timer(sk, &hc->tx_rtotimer);
635 else
636 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
637
638 /* check if incoming Acks allow pending packets to be sent */
639 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
640 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
699} 641}
700 642
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 643static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +649,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 649 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 650 hc->tx_ssthresh = ~0U;
709 651
710 /* 652 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 653 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 654
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 655 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 656 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +661,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 661 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 662 return -ENOMEM;
725 663
726 hc->tx_rto = 3 * HZ; 664 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 665 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 666 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 667 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 668 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 669 return 0;
736} 670}
737 671
@@ -740,7 +674,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 674 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 675 int i;
742 676
743 ccid2_hc_tx_kill_rto_timer(sk); 677 sk_stop_timer(sk, &hc->tx_rtotimer);
744 678
745 for (i = 0; i < hc->tx_seqbufc; i++) 679 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 680 kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..25cb6b216eda 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
48 */ 58 */
@@ -55,17 +65,27 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 65 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 66 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 67 struct ccid2_seq *tx_seqt;
58 long tx_rto; 68
59 long tx_srtt; 69 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 70 u32 tx_srtt,
61 unsigned long tx_lastrtt; 71 tx_mdev,
72 tx_mdev_max,
73 tx_rttvar,
74 tx_rto;
75 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 76 struct timer_list tx_rtotimer;
77
63 u64 tx_rpseq; 78 u64 tx_rpseq;
64 int tx_rpdupack; 79 int tx_rpdupack;
65 unsigned long tx_last_cong; 80 u32 tx_last_cong;
66 u64 tx_high_ack; 81 u64 tx_high_ack;
67}; 82};
68 83
84static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
85{
86 return hc->tx_pipe >= hc->tx_cwnd;
87}
88
69struct ccid2_hc_rx_sock { 89struct ccid2_hc_rx_sock {
70 int rx_data; 90 int rx_data;
71}; 91};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f752986497..3d604e1349c0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
54 [TFRC_SSTATE_NO_SENT] = "NO_SENT", 54 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
56 [TFRC_SSTATE_FBACK] = "FBACK", 56 [TFRC_SSTATE_FBACK] = "FBACK",
57 [TFRC_SSTATE_TERM] = "TERM",
58 }; 57 };
59 58
60 return ccid3_state_names[state]; 59 return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 90 return scaled_div(w_init << 6, hc->tx_rtt);
92} 91}
93 92
94/* 93/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 94 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
95 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 96 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 98{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 99 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 100
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 101 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 102 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 103}
108 104
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 105static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, 207 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 208 ccid3_tx_state_name(hc->tx_state));
213 209
210 /* Ignore and do not restart after leaving the established state */
211 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
212 goto out;
213
214 /* Reset feedback state to "no feedback received" */
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 215 if (hc->tx_state == TFRC_SSTATE_FBACK)
215 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 216 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
216 else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
217 goto out;
218 217
219 /* 218 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 219 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
220 * RTO is 0 if and only if no feedback has been received yet.
221 */ 221 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 222 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 223
225 /* halve send rate directly */ 224 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 225 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 255 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 256 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 257 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 258 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 259 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 260 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 261 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -269,11 +268,11 @@ out:
269 sock_put(sk); 268 sock_put(sk);
270} 269}
271 270
272/* 271/**
273 * returns 272 * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
274 * > 0: delay (in msecs) that should pass before actually sending 273 * @skb: next packet candidate to send on @sk
275 * = 0: can send immediately 274 * This function uses the convention of ccid_packet_dequeue_eval() and
276 * < 0: error condition; do not send packet 275 * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
277 */ 276 */
278static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) 277static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
279{ 278{
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
290 if (unlikely(skb->len == 0)) 289 if (unlikely(skb->len == 0))
291 return -EBADMSG; 290 return -EBADMSG;
292 291
293 switch (hc->tx_state) { 292 if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
294 case TFRC_SSTATE_NO_SENT:
295 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + 293 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
296 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 294 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
297 hc->tx_last_win_count = 0; 295 hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
326 ccid3_update_send_interval(hc); 324 ccid3_update_send_interval(hc);
327 325
328 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 326 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
329 break; 327
330 case TFRC_SSTATE_NO_FBACK: 328 } else {
331 case TFRC_SSTATE_FBACK:
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break;
347 case TFRC_SSTATE_TERM:
348 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
349 return -EINVAL;
350 } 343 }
351 344
352 /* prepare to send now (add options etc.) */ 345 /* prepare to send now (add options etc.) */
@@ -355,11 +348,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
355 348
356 /* set the nominal send time for the next following packet */ 349 /* set the nominal send time for the next following packet */
357 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); 350 hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
358 return 0; 351 return CCID_PACKET_SEND_AT_ONCE;
359} 352}
360 353
361static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, 354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
362 unsigned int len)
363{ 355{
364 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 356 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
365 357
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 364static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 365{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 366 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 367 struct tfrc_tx_hist_entry *acked;
376 ktime_t now; 368 ktime_t now;
377 unsigned long t_nfb; 369 unsigned long t_nfb;
378 u32 pinv, r_sample; 370 u32 r_sample;
379 371
380 /* we are only interested in ACKs */ 372 /* we are only interested in ACKs */
381 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 373 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
382 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 374 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
383 return; 375 return;
384 /* ... and only in the established state */
385 if (hc->tx_state != TFRC_SSTATE_FBACK &&
386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return;
388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real();
391
392 /* Estimate RTT from history if ACK number is valid */
393 r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
394 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
395 if (r_sample == 0) {
396 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
397 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
398 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
399 return;
400 }
401
402 /* Update receive rate in units of 64 * bytes/second */
403 hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
404 hc->tx_x_recv <<= 6;
405
406 /* Update loss event rate (which is scaled by 1e6) */
407 pinv = opt_recv->ccid3or_loss_event_rate;
408 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
409 hc->tx_p = 0;
410 else /* can not exceed 100% */
411 hc->tx_p = scaled_div(1, pinv);
412 /* 376 /*
413 * Validate new RTT sample and update moving average 377 * Locate the acknowledged packet in the TX history.
378 *
379 * Returning "entry not found" here can for instance happen when
380 * - the host has not sent out anything (e.g. a passive server),
381 * - the Ack is outdated (packet with higher Ack number was received),
382 * - it is a bogus Ack (for a packet not sent on this connection).
414 */ 383 */
415 r_sample = dccp_sample_rtt(sk, r_sample); 384 acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
385 if (acked == NULL)
386 return;
387 /* For the sake of RTT sampling, ignore/remove all older entries */
388 tfrc_tx_hist_purge(&acked->next);
389
390 /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
391 now = ktime_get_real();
392 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
416 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); 393 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
394
417 /* 395 /*
418 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 396 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
419 */ 397 */
@@ -461,13 +439,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 439 sk->sk_write_space(sk);
462 440
463 /* 441 /*
464 * Update timeout interval for the nofeedback timer. 442 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 443 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 444 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 445 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 446 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 447 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 448 /*
472 * Schedule no feedback timer to expire in 449 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 450 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +459,41 @@ done_computing_x:
482 jiffies + usecs_to_jiffies(t_nfb)); 459 jiffies + usecs_to_jiffies(t_nfb));
483} 460}
484 461
485static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 462static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
486 unsigned char len, u16 idx, 463 u8 option, u8 *optval, u8 optlen)
487 unsigned char *value)
488{ 464{
489 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 465 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv;
493 __be32 opt_val; 466 __be32 opt_val;
494 467
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0;
500 opt_recv->ccid3or_loss_intervals_idx = 0;
501 opt_recv->ccid3or_loss_intervals_len = 0;
502 opt_recv->ccid3or_receive_rate = 0;
503 }
504
505 switch (option) { 468 switch (option) {
469 case TFRC_OPT_RECEIVE_RATE:
506 case TFRC_OPT_LOSS_EVENT_RATE: 470 case TFRC_OPT_LOSS_EVENT_RATE:
507 if (unlikely(len != 4)) { 471 /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
508 DCCP_WARN("%s(%p), invalid len %d " 472 if (packet_type == DCCP_PKT_DATA)
509 "for TFRC_OPT_LOSS_EVENT_RATE\n", 473 break;
510 dccp_role(sk), sk, len); 474 if (unlikely(optlen != 4)) {
511 rc = -EINVAL; 475 DCCP_WARN("%s(%p), invalid len %d for %u\n",
512 } else { 476 dccp_role(sk), sk, optlen, option);
513 opt_val = get_unaligned((__be32 *)value); 477 return -EINVAL;
514 opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
515 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
516 dccp_role(sk), sk,
517 opt_recv->ccid3or_loss_event_rate);
518 } 478 }
519 break; 479 opt_val = ntohl(get_unaligned((__be32 *)optval));
520 case TFRC_OPT_LOSS_INTERVALS: 480
521 opt_recv->ccid3or_loss_intervals_idx = idx; 481 if (option == TFRC_OPT_RECEIVE_RATE) {
522 opt_recv->ccid3or_loss_intervals_len = len; 482 /* Receive Rate is kept in units of 64 bytes/second */
523 ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", 483 hc->tx_x_recv = opt_val;
524 dccp_role(sk), sk, 484 hc->tx_x_recv <<= 6;
525 opt_recv->ccid3or_loss_intervals_idx, 485
526 opt_recv->ccid3or_loss_intervals_len);
527 break;
528 case TFRC_OPT_RECEIVE_RATE:
529 if (unlikely(len != 4)) {
530 DCCP_WARN("%s(%p), invalid len %d "
531 "for TFRC_OPT_RECEIVE_RATE\n",
532 dccp_role(sk), sk, len);
533 rc = -EINVAL;
534 } else {
535 opt_val = get_unaligned((__be32 *)value);
536 opt_recv->ccid3or_receive_rate = ntohl(opt_val);
537 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", 486 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
538 dccp_role(sk), sk, 487 dccp_role(sk), sk, opt_val);
539 opt_recv->ccid3or_receive_rate); 488 } else {
489 /* Update the fixpoint Loss Event Rate fraction */
490 hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
491
492 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
493 dccp_role(sk), sk, opt_val);
540 } 494 }
541 break;
542 } 495 }
543 496 return 0;
544 return rc;
545} 497}
546 498
547static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) 499static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
559{ 511{
560 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 512 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
561 513
562 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
563 sk_stop_timer(sk, &hc->tx_no_feedback_timer); 514 sk_stop_timer(sk, &hc->tx_no_feedback_timer);
564
565 tfrc_tx_hist_purge(&hc->tx_hist); 515 tfrc_tx_hist_purge(&hc->tx_hist);
566} 516}
567 517
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 518static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 519{
570 struct ccid3_hc_tx_sock *hc; 520 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 521 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 522}
580 523
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 524static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 525 u32 __user *optval, int __user *optlen)
583{ 526{
584 const struct ccid3_hc_tx_sock *hc; 527 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
528 struct tfrc_tx_info tfrc;
585 const void *val; 529 const void *val;
586 530
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 531 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 532 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 533 if (len < sizeof(tfrc))
595 return -EINVAL; 534 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 535 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 536 tfrc.tfrctx_x_recv = hc->tx_x_recv;
537 tfrc.tfrctx_x_calc = hc->tx_x_calc;
538 tfrc.tfrctx_rtt = hc->tx_rtt;
539 tfrc.tfrctx_p = hc->tx_p;
540 tfrc.tfrctx_rto = hc->tx_t_rto;
541 tfrc.tfrctx_ipi = hc->tx_t_ipi;
542 len = sizeof(tfrc);
543 val = &tfrc;
598 break; 544 break;
599 default: 545 default:
600 return -ENOPROTOOPT; 546 return -ENOPROTOOPT;
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
624 static const char *const ccid3_rx_state_names[] = { 570 static const char *const ccid3_rx_state_names[] = {
625 [TFRC_RSTATE_NO_DATA] = "NO_DATA", 571 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
626 [TFRC_RSTATE_DATA] = "DATA", 572 [TFRC_RSTATE_DATA] = "DATA",
627 [TFRC_RSTATE_TERM] = "TERM",
628 }; 573 };
629 574
630 return ccid3_rx_state_names[state]; 575 return ccid3_rx_state_names[state];
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
650{ 595{
651 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 596 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
652 struct dccp_sock *dp = dccp_sk(sk); 597 struct dccp_sock *dp = dccp_sk(sk);
653 ktime_t now; 598 ktime_t now = ktime_get_real();
654 s64 delta = 0; 599 s64 delta = 0;
655 600
656 if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
657 return;
658
659 now = ktime_get_real();
660
661 switch (fbtype) { 601 switch (fbtype) {
662 case CCID3_FBACK_INITIAL: 602 case CCID3_FBACK_INITIAL:
663 hc->rx_x_recv = 0; 603 hc->rx_x_recv = 0;
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 641
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 642static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 643{
704 const struct ccid3_hc_rx_sock *hc; 644 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 645 __be32 x_recv, pinv;
706 646
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 647 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 648 return 0;
709 649
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 650 if (dccp_packet_without_ack(skb))
713 return 0; 651 return 0;
714 652
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 687 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 688 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 689 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 690 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 691 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 692 return ~0U;
755 } 693 }
694 x_recv = hc->rx_x_recv;
756 } 695 }
757 696
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 697 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
862{ 801{
863 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 802 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
864 803
865 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
866
867 tfrc_rx_hist_purge(&hc->rx_hist); 804 tfrc_rx_hist_purge(&hc->rx_hist);
868 tfrc_lh_cleanup(&hc->rx_li_hist); 805 tfrc_lh_cleanup(&hc->rx_li_hist);
869} 806}
870 807
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 808static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 809{
873 const struct ccid3_hc_rx_sock *hc; 810 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 811 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 812 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 813}
884 814
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 815static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 816 u32 __user *optval, int __user *optlen)
887{ 817{
888 const struct ccid3_hc_rx_sock *hc; 818 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 819 struct tfrc_rx_info rx_info;
890 const void *val; 820 const void *val;
891 821
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 822 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 823 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 824 if (len < sizeof(rx_info))
900 return -EINVAL; 825 return -EINVAL;
901 rx_info.tfrcrx_x_recv = hc->rx_x_recv; 826 rx_info.tfrcrx_x_recv = hc->rx_x_recv;
902 rx_info.tfrcrx_rtt = hc->rx_rtt; 827 rx_info.tfrcrx_rtt = hc->rx_rtt;
903 rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : 828 rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
904 scaled_div(1, hc->rx_pinv);
905 len = sizeof(rx_info); 829 len = sizeof(rx_info);
906 val = &rx_info; 830 val = &rx_info;
907 break; 831 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 032635776653..1a9933c29672 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
57 TFRC_OPT_RECEIVE_RATE = 194, 67 TFRC_OPT_RECEIVE_RATE = 194,
58}; 68};
59 69
60struct ccid3_options_received {
61 u64 ccid3or_seqno:48,
62 ccid3or_loss_intervals_idx:16;
63 u16 ccid3or_loss_intervals_len;
64 u32 ccid3or_loss_event_rate;
65 u32 ccid3or_receive_rate;
66};
67
68/* TFRC sender states */ 70/* TFRC sender states */
69enum ccid3_hc_tx_states { 71enum ccid3_hc_tx_states {
70 TFRC_SSTATE_NO_SENT = 1, 72 TFRC_SSTATE_NO_SENT = 1,
71 TFRC_SSTATE_NO_FBACK, 73 TFRC_SSTATE_NO_FBACK,
72 TFRC_SSTATE_FBACK, 74 TFRC_SSTATE_FBACK,
73 TFRC_SSTATE_TERM,
74}; 75};
75 76
76/** 77/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 91 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 92 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 93 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 94 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options
96 */ 95 */
97struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 97 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 98 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 99 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 100 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 101 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 102 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 103 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 104 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 105 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 106 u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
110 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
111 ktime_t tx_t_ld; 109 ktime_t tx_t_ld;
112 ktime_t tx_t_nom; 110 ktime_t tx_t_nom;
113 u32 tx_delta;
114 struct tfrc_tx_hist_entry *tx_hist; 111 struct tfrc_tx_hist_entry *tx_hist;
115 struct ccid3_options_received tx_options_received;
116}; 112};
117 113
118static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 114static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
126enum ccid3_hc_rx_states { 122enum ccid3_hc_rx_states {
127 TFRC_RSTATE_NO_DATA = 1, 123 TFRC_RSTATE_NO_DATA = 1,
128 TFRC_RSTATE_DATA, 124 TFRC_RSTATE_DATA,
129 TFRC_RSTATE_TERM = 127,
130}; 125};
131 126
132/** 127/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 128 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 129 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 130 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 131 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 132 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 133 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 134 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 135 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 136 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 137 * @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf79071..497723c4d4bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
116 cur->li_length = len; 116 cur->li_length = len;
117 tfrc_lh_calc_i_mean(lh); 117 tfrc_lh_calc_i_mean(lh);
118 118
119 return (lh->i_mean < old_i_mean); 119 return lh->i_mean < old_i_mean;
120} 120}
121 121
122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a0..de8fe294bf0b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
38#include "packet_history.h" 38#include "packet_history.h"
39#include "../../dccp.h" 39#include "../../dccp.h"
40 40
41/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list
43 * @next: next oldest entry (LIFO order)
44 * @seqno: sequence number of this entry
45 * @stamp: send time of packet with sequence number @seqno
46 */
47struct tfrc_tx_hist_entry {
48 struct tfrc_tx_hist_entry *next;
49 u64 seqno;
50 ktime_t stamp;
51};
52
53/* 41/*
54 * Transmitter History Routines 42 * Transmitter History Routines
55 */ 43 */
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
71 } 59 }
72} 60}
73 61
74static struct tfrc_tx_hist_entry *
75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
76{
77 while (head != NULL && head->seqno != seqno)
78 head = head->next;
79
80 return head;
81}
82
83int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 62int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
84{ 63{
85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 64 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
107 *headp = NULL; 86 *headp = NULL;
108} 87}
109 88
110u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111 const ktime_t now)
112{
113 u32 rtt = 0;
114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
115
116 if (packet != NULL) {
117 rtt = ktime_us_delta(now, packet->stamp);
118 /*
119 * Garbage-collect older (irrelevant) entries:
120 */
121 tfrc_tx_hist_purge(&packet->next);
122 }
123
124 return rtt;
125}
126
127
128/* 89/*
129 * Receiver History Routines 90 * Receiver History Routines
130 */ 91 */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c5299999..7ee4a9d9d335 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43struct tfrc_tx_hist_entry; 43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
44 62
45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
49 65
50/* Subtraction a-b modulo-16, respects circular wrap-around */ 66/* Subtraction a-b modulo-16, respects circular wrap-around */
51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2e..f8ee3f549770 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57 57
58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
60extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
60 61
61extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
62extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb55..a052a4377e26 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
687 index = tfrc_binsearch(fvalue, 0); 687 index = tfrc_binsearch(fvalue, 0);
688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
689} 689}
690
691/**
692 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
693 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
694 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
695 */
696u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
697{
698 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
699 return 0;
700 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
701 return 1000000;
702 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
703}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3ccef1b70fee..a8ed459508b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,18 +153,27 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
153} 153}
154 154
155/** 155/**
156 * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 156 * dccp_loss_count - Approximate the number of lost data packets in a burst loss
157 * @s1: start sequence number 157 * @s1: last known sequence number before the loss ('hole')
158 * @s2: end sequence number 158 * @s2: first sequence number seen after the 'hole'
159 * @ndp: NDP count on packet with sequence number @s2 159 * @ndp: NDP count on packet with sequence number @s2
160 * Returns true if the sequence range s1...s2 has no data loss.
161 */ 160 */
162static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) 161static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp)
163{ 162{
164 s64 delta = dccp_delta_seqno(s1, s2); 163 s64 delta = dccp_delta_seqno(s1, s2);
165 164
166 WARN_ON(delta < 0); 165 WARN_ON(delta < 0);
167 return (u64)delta <= ndp + 1; 166 delta -= ndp + 1;
167
168 return delta > 0 ? delta : 0;
169}
170
171/**
172 * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1
173 */
174static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
175{
176 return dccp_loss_count(s1, s2, ndp) == 0;
168} 177}
169 178
170enum { 179enum {
@@ -234,8 +243,9 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
234extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
235 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
236 245
237extern void dccp_write_xmit(struct sock *sk, int block); 246extern void dccp_write_xmit(struct sock *sk);
238extern void dccp_write_space(struct sock *sk); 247extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
239 249
240extern void dccp_init_xmit_timers(struct sock *sk); 250extern void dccp_init_xmit_timers(struct sock *sk);
241static inline void dccp_clear_xmit_timers(struct sock *sk) 251static inline void dccp_clear_xmit_timers(struct sock *sk)
@@ -246,7 +256,6 @@ static inline void dccp_clear_xmit_timers(struct sock *sk)
246extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); 256extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
247 257
248extern const char *dccp_packet_name(const int type); 258extern const char *dccp_packet_name(const int type);
249extern const char *dccp_state_name(const int state);
250 259
251extern void dccp_set_state(struct sock *sk, const int state); 260extern void dccp_set_state(struct sock *sk, const int state);
252extern void dccp_done(struct sock *sk); 261extern void dccp_done(struct sock *sk);
@@ -415,6 +424,23 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
415 dp->dccps_gsr = seq; 424 dp->dccps_gsr = seq;
416 /* Sequence validity window depends on remote Sequence Window (7.5.1) */ 425 /* Sequence validity window depends on remote Sequence Window (7.5.1) */
417 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); 426 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
427 /*
428 * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
429 * 7.5.1 we perform this check beyond the initial handshake: W/W' are
430 * always > 32, so for the first W/W' packets in the lifetime of a
431 * connection we always have to adjust SWL.
432 * A second reason why we are doing this is that the window depends on
433 * the feature-remote value of Sequence Window: nothing stops the peer
434 * from updating this value while we are busy adjusting SWL for the
435 * first W packets (we would have to count from scratch again then).
436 * Therefore it is safer to always make sure that the Sequence Window
437 * is not artificially extended by a peer who grows SWL downwards by
438 * continually updating the feature-remote Sequence-Window.
439 * If sequence numbers wrap it is bad luck. But that will take a while
440 * (48 bit), and this measure prevents Sequence-number attacks.
441 */
442 if (before48(dp->dccps_swl, dp->dccps_isr))
443 dp->dccps_swl = dp->dccps_isr;
418 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); 444 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
419} 445}
420 446
@@ -425,14 +451,16 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
425 dp->dccps_gss = seq; 451 dp->dccps_gss = seq;
426 /* Ack validity window depends on local Sequence Window value (7.5.1) */ 452 /* Ack validity window depends on local Sequence Window value (7.5.1) */
427 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); 453 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
454 /* Adjust AWL so that it is not below ISS - see comment above for SWL */
455 if (before48(dp->dccps_awl, dp->dccps_iss))
456 dp->dccps_awl = dp->dccps_iss;
428 dp->dccps_awh = dp->dccps_gss; 457 dp->dccps_awh = dp->dccps_gss;
429} 458}
430 459
431static inline int dccp_ack_pending(const struct sock *sk) 460static inline int dccp_ack_pending(const struct sock *sk)
432{ 461{
433 const struct dccp_sock *dp = dccp_sk(sk); 462 const struct dccp_sock *dp = dccp_sk(sk);
434 return dp->dccps_timestamp_echo != 0 || 463 return (dp->dccps_hc_rx_ackvec != NULL &&
435 (dp->dccps_hc_rx_ackvec != NULL &&
436 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || 464 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
437 inet_csk_ack_scheduled(sk); 465 inet_csk_ack_scheduled(sk);
438} 466}
@@ -449,7 +477,6 @@ extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
449extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); 477extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
450extern u32 dccp_timestamp(void); 478extern u32 dccp_timestamp(void);
451extern void dccp_timestamping_init(void); 479extern void dccp_timestamping_init(void);
452extern int dccp_insert_option_timestamp(struct sk_buff *skb);
453extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, 480extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
454 const void *value, unsigned char len); 481 const void *value, unsigned char len);
455 482
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index df7dd26cf07e..568def952722 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -730,16 +730,6 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
730 0, list, len); 730 0, list, len);
731} 731}
732 732
733/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
734int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
735{
736 /* any changes must be registered before establishing the connection */
737 if (sk->sk_state != DCCP_CLOSED)
738 return -EISCONN;
739 if (dccp_feat_type(feat) != FEAT_NN)
740 return -EINVAL;
741 return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
742}
743 733
744/* 734/*
745 * Tracking features whose value depend on the choice of CCID 735 * Tracking features whose value depend on the choice of CCID
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index f96721619def..e56a4e5e634e 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -111,7 +111,6 @@ extern int dccp_feat_init(struct sock *sk);
111extern void dccp_feat_initialise_sysctls(void); 111extern void dccp_feat_initialise_sysctls(void);
112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
113 u8 const *list, u8 len); 113 u8 const *list, u8 len);
114extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
115extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, 114extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
116 u8 mand, u8 opt, u8 feat, u8 *val, u8 len); 115 u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
117extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); 116extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 10c957a88f4f..265985370fa1 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -259,7 +259,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
259 sysctl_dccp_sync_ratelimit))) 259 sysctl_dccp_sync_ratelimit)))
260 return 0; 260 return 0;
261 261
262 DCCP_WARN("DCCP: Step 6 failed for %s packet, " 262 DCCP_WARN("Step 6 failed for %s packet, "
263 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " 263 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
264 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " 264 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
265 "sending SYNC...\n", dccp_packet_name(dh->dccph_type), 265 "sending SYNC...\n", dccp_packet_name(dh->dccph_type),
@@ -441,20 +441,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
441 kfree_skb(sk->sk_send_head); 441 kfree_skb(sk->sk_send_head);
442 sk->sk_send_head = NULL; 442 sk->sk_send_head = NULL;
443 443
444 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
445 dccp_update_gsr(sk, dp->dccps_isr);
446 /* 444 /*
447 * SWL and AWL are initially adjusted so that they are not less than 445 * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
448 * the initial Sequence Numbers received and sent, respectively: 446 * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
449 * SWL := max(GSR + 1 - floor(W/4), ISR), 447 * is done as part of activating the feature values below, since
450 * AWL := max(GSS - W' + 1, ISS). 448 * these settings depend on the local/remote Sequence Window
451 * These adjustments MUST be applied only at the beginning of the 449 * features, which were undefined or not confirmed until now.
452 * connection.
453 *
454 * AWL was adjusted in dccp_v4_connect -acme
455 */ 450 */
456 dccp_set_seqno(&dp->dccps_swl, 451 dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
457 max48(dp->dccps_swl, dp->dccps_isr));
458 452
459 dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); 453 dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
460 454
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f391..3f69ea114829 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
392 392
393 newsk = dccp_create_openreq_child(sk, req, skb); 393 newsk = dccp_create_openreq_child(sk, req, skb);
394 if (newsk == NULL) 394 if (newsk == NULL)
395 goto exit; 395 goto exit_nonewsk;
396 396
397 sk_setup_caps(newsk, dst); 397 sk_setup_caps(newsk, dst);
398 398
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
409 409
410 dccp_sync_mss(newsk, dst_mtu(dst)); 410 dccp_sync_mss(newsk, dst_mtu(dst));
411 411
412 if (__inet_inherit_port(sk, newsk) < 0) {
413 sock_put(newsk);
414 goto exit;
415 }
412 __inet_hash_nolisten(newsk, NULL); 416 __inet_hash_nolisten(newsk, NULL);
413 __inet_inherit_port(sk, newsk);
414 417
415 return newsk; 418 return newsk;
416 419
417exit_overflow: 420exit_overflow:
418 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 421 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
422exit_nonewsk:
423 dst_release(dst);
419exit: 424exit:
420 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
421 dst_release(dst);
422 return NULL; 426 return NULL;
423} 427}
424 428
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df7..dca711df9b60 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
564 564
565 newsk = dccp_create_openreq_child(sk, req, skb); 565 newsk = dccp_create_openreq_child(sk, req, skb);
566 if (newsk == NULL) 566 if (newsk == NULL)
567 goto out; 567 goto out_nonewsk;
568 568
569 /* 569 /*
570 * No need to charge this sock to the relevant IPv6 refcnt debug socks 570 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
634 634
635 if (__inet_inherit_port(sk, newsk) < 0) {
636 sock_put(newsk);
637 goto out;
638 }
635 __inet6_hash(newsk, NULL); 639 __inet6_hash(newsk, NULL);
636 __inet_inherit_port(sk, newsk);
637 640
638 return newsk; 641 return newsk;
639 642
640out_overflow: 643out_overflow:
641 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
645out_nonewsk:
646 dst_release(dst);
642out: 647out:
643 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 648 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
644 if (opt != NULL && opt != np->opt) 649 if (opt != NULL && opt != np->opt)
645 sock_kfree_s(sk, opt, opt->tot_len); 650 sock_kfree_s(sk, opt, opt->tot_len);
646 dst_release(dst);
647 return NULL; 651 return NULL;
648} 652}
649 653
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 128b089d3aef..d7041a0963af 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -121,30 +121,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
121 * 121 *
122 * Choose S.ISS (initial seqno) or set from Init Cookies 122 * Choose S.ISS (initial seqno) or set from Init Cookies
123 * Initialize S.GAR := S.ISS 123 * Initialize S.GAR := S.ISS
124 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies 124 * Set S.ISR, S.GSR from packet (or Init Cookies)
125 */ 125 *
126 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; 126 * Setting AWL/AWH and SWL/SWH happens as part of the feature
127 dccp_update_gss(newsk, dreq->dreq_iss); 127 * activation below, as these windows all depend on the local
128 128 * and remote Sequence Window feature values (7.5.2).
129 newdp->dccps_isr = dreq->dreq_isr;
130 dccp_update_gsr(newsk, dreq->dreq_isr);
131
132 /*
133 * SWL and AWL are initially adjusted so that they are not less than
134 * the initial Sequence Numbers received and sent, respectively:
135 * SWL := max(GSR + 1 - floor(W/4), ISR),
136 * AWL := max(GSS - W' + 1, ISS).
137 * These adjustments MUST be applied only at the beginning of the
138 * connection.
139 */ 129 */
140 dccp_set_seqno(&newdp->dccps_swl, 130 newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
141 max48(newdp->dccps_swl, newdp->dccps_isr)); 131 newdp->dccps_gar = newdp->dccps_iss;
142 dccp_set_seqno(&newdp->dccps_awl, 132 newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
143 max48(newdp->dccps_awl, newdp->dccps_iss));
144 133
145 /* 134 /*
146 * Activate features after initialising the sequence numbers, 135 * Activate features: initialise CCIDs, sequence windows etc.
147 * since CCID initialisation may depend on GSS, ISR, ISS etc.
148 */ 136 */
149 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { 137 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
150 /* It is still raw copy of parent, so invalidate 138 /* It is still raw copy of parent, so invalidate
diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfda087bd90d..cd3061813009 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
96 } 96 }
97 97
98 /* 98 /*
99 * CCID-Specific Options (from RFC 4340, sec. 10.3):
100 *
101 * Option numbers 128 through 191 are for options sent from the
102 * HC-Sender to the HC-Receiver; option numbers 192 through 255
103 * are for options sent from the HC-Receiver to the HC-Sender.
104 *
105 * CCID-specific options are ignored during connection setup, as 99 * CCID-specific options are ignored during connection setup, as
106 * negotiation may still be in progress (see RFC 4340, 10.3). 100 * negotiation may still be in progress (see RFC 4340, 10.3).
107 * The same applies to Ack Vectors, as these depend on the CCID. 101 * The same applies to Ack Vectors, as these depend on the CCID.
108 *
109 */ 102 */
110 if (dreq != NULL && (opt >= 128 || 103 if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
111 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) 104 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
112 goto ignore_option; 105 goto ignore_option;
113 106
@@ -170,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
170 dccp_role(sk), ntohl(opt_val), 163 dccp_role(sk), ntohl(opt_val),
171 (unsigned long long) 164 (unsigned long long)
172 DCCP_SKB_CB(skb)->dccpd_ack_seq); 165 DCCP_SKB_CB(skb)->dccpd_ack_seq);
166 /* schedule an Ack in case this sender is quiescent */
167 inet_csk_schedule_ack(sk);
173 break; 168 break;
174 case DCCPO_TIMESTAMP_ECHO: 169 case DCCPO_TIMESTAMP_ECHO:
175 if (len != 4 && len != 6 && len != 8) 170 if (len != 4 && len != 6 && len != 8)
@@ -226,23 +221,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
226 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 221 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
227 dccp_role(sk), elapsed_time); 222 dccp_role(sk), elapsed_time);
228 break; 223 break;
229 case 128 ... 191: { 224 case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
230 const u16 idx = value - options;
231
232 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, 225 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
233 opt, len, idx, 226 pkt_type, opt, value, len))
234 value) != 0)
235 goto out_invalid_option; 227 goto out_invalid_option;
236 }
237 break; 228 break;
238 case 192 ... 255: { 229 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
239 const u16 idx = value - options;
240
241 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 230 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
242 opt, len, idx, 231 pkt_type, opt, value, len))
243 value) != 0)
244 goto out_invalid_option; 232 goto out_invalid_option;
245 }
246 break; 233 break;
247 default: 234 default:
248 DCCP_CRIT("DCCP(%p): option %d(len=%d) not " 235 DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
@@ -384,7 +371,7 @@ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
384 371
385EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 372EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
386 373
387int dccp_insert_option_timestamp(struct sk_buff *skb) 374static int dccp_insert_option_timestamp(struct sk_buff *skb)
388{ 375{
389 __be32 now = htonl(dccp_timestamp()); 376 __be32 now = htonl(dccp_timestamp());
390 /* yes this will overflow but that is the point as we want a 377 /* yes this will overflow but that is the point as we want a
@@ -393,8 +380,6 @@ int dccp_insert_option_timestamp(struct sk_buff *skb)
393 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); 380 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
394} 381}
395 382
396EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
397
398static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, 383static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
399 struct dccp_request_sock *dreq, 384 struct dccp_request_sock *dreq,
400 struct sk_buff *skb) 385 struct sk_buff *skb)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aadbdb58758b..45b91853f5ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -209,108 +209,150 @@ void dccp_write_space(struct sock *sk)
209} 209}
210 210
211/** 211/**
212 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 212 * dccp_wait_for_ccid - Await CCID send permission
213 * @sk: socket to wait for 213 * @sk: socket to wait for
214 * @skb: current skb to pass on for waiting 214 * @delay: timeout in jiffies
215 * @delay: sleep timeout in milliseconds (> 0) 215 * This is used by CCIDs which need to delay the send time in process context.
216 * This function is called by default when the socket is closed, and
217 * when a non-zero linger time is set on the socket. For consistency
218 */ 216 */
219static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) 217static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
220{ 218{
221 struct dccp_sock *dp = dccp_sk(sk);
222 DEFINE_WAIT(wait); 219 DEFINE_WAIT(wait);
223 unsigned long jiffdelay; 220 long remaining;
224 int rc; 221
222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
223 sk->sk_write_pending++;
224 release_sock(sk);
225
226 remaining = schedule_timeout(delay);
227
228 lock_sock(sk);
229 sk->sk_write_pending--;
230 finish_wait(sk_sleep(sk), &wait);
231
232 if (signal_pending(current) || sk->sk_err)
233 return -1;
234 return remaining;
235}
236
237/**
238 * dccp_xmit_packet - Send data packet under control of CCID
239 * Transmits next-queued payload and informs CCID to account for the packet.
240 */
241static void dccp_xmit_packet(struct sock *sk)
242{
243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
246
247 if (unlikely(skb == NULL))
248 return;
249 len = skb->len;
225 250
226 do { 251 if (sk->sk_state == DCCP_PARTOPEN) {
227 dccp_pr_debug("delayed send by %d msec\n", delay); 252 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
228 jiffdelay = msecs_to_jiffies(delay); 253 /*
254 * See 8.1.5 - Handshake Completion.
255 *
256 * For robustness we resend Confirm options until the client has
257 * entered OPEN. During the initial feature negotiation, the MPS
258 * is smaller than usual, reduced by the Change/Confirm options.
259 */
260 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
261 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
262 dccp_send_ack(sk);
263 dccp_feat_list_purge(&dp->dccps_featneg);
264 }
229 265
230 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 266 inet_csk_schedule_ack(sk);
267 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
268 inet_csk(sk)->icsk_rto,
269 DCCP_RTO_MAX);
270 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
271 } else if (dccp_ack_pending(sk)) {
272 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
273 } else {
274 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
275 }
231 276
232 sk->sk_write_pending++; 277 err = dccp_transmit_skb(sk, skb);
233 release_sock(sk); 278 if (err)
234 schedule_timeout(jiffdelay); 279 dccp_pr_debug("transmit_skb() returned err=%d\n", err);
235 lock_sock(sk); 280 /*
236 sk->sk_write_pending--; 281 * Register this one as sent even if an error occurred. To the remote
282 * end a local packet drop is indistinguishable from network loss, i.e.
283 * any local drop will eventually be reported via receiver feedback.
284 */
285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
286}
237 287
238 if (sk->sk_err) 288/**
239 goto do_error; 289 * dccp_flush_write_queue - Drain queue at end of connection
240 if (signal_pending(current)) 290 * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
241 goto do_interrupted; 291 * happen that the TX queue is not empty at the end of a connection. We give the
292 * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
293 * returns with a non-empty write queue, it will be purged later.
294 */
295void dccp_flush_write_queue(struct sock *sk, long *time_budget)
296{
297 struct dccp_sock *dp = dccp_sk(sk);
298 struct sk_buff *skb;
299 long delay, rc;
242 300
301 while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
243 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 302 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
244 } while ((delay = rc) > 0); 303
245out: 304 switch (ccid_packet_dequeue_eval(rc)) {
246 finish_wait(sk_sleep(sk), &wait); 305 case CCID_PACKET_WILL_DEQUEUE_LATER:
247 return rc; 306 /*
248 307 * If the CCID determines when to send, the next sending
249do_error: 308 * time is unknown or the CCID may not even send again
250 rc = -EPIPE; 309 * (e.g. remote host crashes or lost Ack packets).
251 goto out; 310 */
252do_interrupted: 311 DCCP_WARN("CCID did not manage to send all packets\n");
253 rc = -EINTR; 312 return;
254 goto out; 313 case CCID_PACKET_DELAY:
314 delay = msecs_to_jiffies(rc);
315 if (delay > *time_budget)
316 return;
317 rc = dccp_wait_for_ccid(sk, delay);
318 if (rc < 0)
319 return;
320 *time_budget -= (delay - rc);
321 /* check again if we can send now */
322 break;
323 case CCID_PACKET_SEND_AT_ONCE:
324 dccp_xmit_packet(sk);
325 break;
326 case CCID_PACKET_ERR:
327 skb_dequeue(&sk->sk_write_queue);
328 kfree_skb(skb);
329 dccp_pr_debug("packet discarded due to err=%ld\n", rc);
330 }
331 }
255} 332}
256 333
257void dccp_write_xmit(struct sock *sk, int block) 334void dccp_write_xmit(struct sock *sk)
258{ 335{
259 struct dccp_sock *dp = dccp_sk(sk); 336 struct dccp_sock *dp = dccp_sk(sk);
260 struct sk_buff *skb; 337 struct sk_buff *skb;
261 338
262 while ((skb = skb_peek(&sk->sk_write_queue))) { 339 while ((skb = skb_peek(&sk->sk_write_queue))) {
263 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 340 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
264
265 if (err > 0) {
266 if (!block) {
267 sk_reset_timer(sk, &dp->dccps_xmit_timer,
268 msecs_to_jiffies(err)+jiffies);
269 break;
270 } else
271 err = dccp_wait_for_ccid(sk, skb, err);
272 if (err && err != -EINTR)
273 DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
274 }
275 341
276 skb_dequeue(&sk->sk_write_queue); 342 switch (ccid_packet_dequeue_eval(rc)) {
277 if (err == 0) { 343 case CCID_PACKET_WILL_DEQUEUE_LATER:
278 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 344 return;
279 const int len = skb->len; 345 case CCID_PACKET_DELAY:
280 346 sk_reset_timer(sk, &dp->dccps_xmit_timer,
281 if (sk->sk_state == DCCP_PARTOPEN) { 347 jiffies + msecs_to_jiffies(rc));
282 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; 348 return;
283 /* 349 case CCID_PACKET_SEND_AT_ONCE:
284 * See 8.1.5 - Handshake Completion. 350 dccp_xmit_packet(sk);
285 * 351 break;
286 * For robustness we resend Confirm options until the client has 352 case CCID_PACKET_ERR:
287 * entered OPEN. During the initial feature negotiation, the MPS 353 skb_dequeue(&sk->sk_write_queue);
288 * is smaller than usual, reduced by the Change/Confirm options.
289 */
290 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
291 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
292 dccp_send_ack(sk);
293 dccp_feat_list_purge(&dp->dccps_featneg);
294 }
295
296 inet_csk_schedule_ack(sk);
297 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
298 inet_csk(sk)->icsk_rto,
299 DCCP_RTO_MAX);
300 dcb->dccpd_type = DCCP_PKT_DATAACK;
301 } else if (dccp_ack_pending(sk))
302 dcb->dccpd_type = DCCP_PKT_DATAACK;
303 else
304 dcb->dccpd_type = DCCP_PKT_DATA;
305
306 err = dccp_transmit_skb(sk, skb);
307 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
308 if (err)
309 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
310 err);
311 } else {
312 dccp_pr_debug("packet discarded due to err=%d\n", err);
313 kfree_skb(skb); 354 kfree_skb(skb);
355 dccp_pr_debug("packet discarded due to err=%d\n", rc);
314 } 356 }
315 } 357 }
316} 358}
@@ -474,8 +516,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
474/* 516/*
475 * Do all connect socket setups that can be done AF independent. 517 * Do all connect socket setups that can be done AF independent.
476 */ 518 */
477static inline void dccp_connect_init(struct sock *sk) 519int dccp_connect(struct sock *sk)
478{ 520{
521 struct sk_buff *skb;
479 struct dccp_sock *dp = dccp_sk(sk); 522 struct dccp_sock *dp = dccp_sk(sk);
480 struct dst_entry *dst = __sk_dst_get(sk); 523 struct dst_entry *dst = __sk_dst_get(sk);
481 struct inet_connection_sock *icsk = inet_csk(sk); 524 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -485,22 +528,12 @@ static inline void dccp_connect_init(struct sock *sk)
485 528
486 dccp_sync_mss(sk, dst_mtu(dst)); 529 dccp_sync_mss(sk, dst_mtu(dst));
487 530
488 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
489 dp->dccps_gar = dp->dccps_iss;
490
491 icsk->icsk_retransmits = 0;
492}
493
494int dccp_connect(struct sock *sk)
495{
496 struct sk_buff *skb;
497 struct inet_connection_sock *icsk = inet_csk(sk);
498
499 /* do not connect if feature negotiation setup fails */ 531 /* do not connect if feature negotiation setup fails */
500 if (dccp_feat_finalise_settings(dccp_sk(sk))) 532 if (dccp_feat_finalise_settings(dccp_sk(sk)))
501 return -EPROTO; 533 return -EPROTO;
502 534
503 dccp_connect_init(sk); 535 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
536 dp->dccps_gar = dp->dccps_iss;
504 537
505 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 538 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
506 if (unlikely(skb == NULL)) 539 if (unlikely(skb == NULL))
@@ -516,6 +549,7 @@ int dccp_connect(struct sock *sk)
516 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 549 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
517 550
518 /* Timer for repeating the REQUEST until an answer. */ 551 /* Timer for repeating the REQUEST until an answer. */
552 icsk->icsk_retransmits = 0;
519 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 553 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
520 icsk->icsk_rto, DCCP_RTO_MAX); 554 icsk->icsk_rto, DCCP_RTO_MAX);
521 return 0; 555 return 0;
@@ -630,7 +664,6 @@ void dccp_send_close(struct sock *sk, const int active)
630 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; 664 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
631 665
632 if (active) { 666 if (active) {
633 dccp_write_xmit(sk, 1);
634 dccp_skb_entail(sk, skb); 667 dccp_skb_entail(sk, skb);
635 dccp_transmit_skb(sk, skb_clone(skb, prio)); 668 dccp_transmit_skb(sk, skb_clone(skb, prio));
636 /* 669 /*
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 078e48d442fd..33d0e6297c21 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -149,6 +149,7 @@ static const struct file_operations dccpprobe_fops = {
149 .owner = THIS_MODULE, 149 .owner = THIS_MODULE,
150 .open = dccpprobe_open, 150 .open = dccpprobe_open,
151 .read = dccpprobe_read, 151 .read = dccpprobe_read,
152 .llseek = noop_llseek,
152}; 153};
153 154
154static __init int dccpprobe_init(void) 155static __init int dccpprobe_init(void)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 096250d1323b..ef343d53fcea 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -50,6 +50,30 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
50/* the maximum queue length for tx in packets. 0 is no limit */ 50/* the maximum queue length for tx in packets. 0 is no limit */
51int sysctl_dccp_tx_qlen __read_mostly = 5; 51int sysctl_dccp_tx_qlen __read_mostly = 5;
52 52
53#ifdef CONFIG_IP_DCCP_DEBUG
54static const char *dccp_state_name(const int state)
55{
56 static const char *const dccp_state_names[] = {
57 [DCCP_OPEN] = "OPEN",
58 [DCCP_REQUESTING] = "REQUESTING",
59 [DCCP_PARTOPEN] = "PARTOPEN",
60 [DCCP_LISTEN] = "LISTEN",
61 [DCCP_RESPOND] = "RESPOND",
62 [DCCP_CLOSING] = "CLOSING",
63 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
64 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
65 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66 [DCCP_TIME_WAIT] = "TIME_WAIT",
67 [DCCP_CLOSED] = "CLOSED",
68 };
69
70 if (state >= DCCP_MAX_STATES)
71 return "INVALID STATE!";
72 else
73 return dccp_state_names[state];
74}
75#endif
76
53void dccp_set_state(struct sock *sk, const int state) 77void dccp_set_state(struct sock *sk, const int state)
54{ 78{
55 const int oldstate = sk->sk_state; 79 const int oldstate = sk->sk_state;
@@ -146,30 +170,6 @@ const char *dccp_packet_name(const int type)
146 170
147EXPORT_SYMBOL_GPL(dccp_packet_name); 171EXPORT_SYMBOL_GPL(dccp_packet_name);
148 172
149const char *dccp_state_name(const int state)
150{
151 static const char *const dccp_state_names[] = {
152 [DCCP_OPEN] = "OPEN",
153 [DCCP_REQUESTING] = "REQUESTING",
154 [DCCP_PARTOPEN] = "PARTOPEN",
155 [DCCP_LISTEN] = "LISTEN",
156 [DCCP_RESPOND] = "RESPOND",
157 [DCCP_CLOSING] = "CLOSING",
158 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
159 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
160 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
161 [DCCP_TIME_WAIT] = "TIME_WAIT",
162 [DCCP_CLOSED] = "CLOSED",
163 };
164
165 if (state >= DCCP_MAX_STATES)
166 return "INVALID STATE!";
167 else
168 return dccp_state_names[state];
169}
170
171EXPORT_SYMBOL_GPL(dccp_state_name);
172
173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) 173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174{ 174{
175 struct dccp_sock *dp = dccp_sk(sk); 175 struct dccp_sock *dp = dccp_sk(sk);
@@ -726,7 +726,13 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
726 goto out_discard; 726 goto out_discard;
727 727
728 skb_queue_tail(&sk->sk_write_queue, skb); 728 skb_queue_tail(&sk->sk_write_queue, skb);
729 dccp_write_xmit(sk,0); 729 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the
732 * network. Window-based CCIDs do not use this timer.
733 */
734 if (!timer_pending(&dp->dccps_xmit_timer))
735 dccp_write_xmit(sk);
730out_release: 736out_release:
731 release_sock(sk); 737 release_sock(sk);
732 return rc ? : len; 738 return rc ? : len;
@@ -944,16 +950,29 @@ void dccp_close(struct sock *sk, long timeout)
944 950
945 if (data_was_unread) { 951 if (data_was_unread) {
946 /* Unread data was tossed, send an appropriate Reset Code */ 952 /* Unread data was tossed, send an appropriate Reset Code */
947 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); 953 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
948 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 954 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
949 dccp_set_state(sk, DCCP_CLOSED); 955 dccp_set_state(sk, DCCP_CLOSED);
950 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 956 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
951 /* Check zero linger _after_ checking for unread data. */ 957 /* Check zero linger _after_ checking for unread data. */
952 sk->sk_prot->disconnect(sk, 0); 958 sk->sk_prot->disconnect(sk, 0);
953 } else if (sk->sk_state != DCCP_CLOSED) { 959 } else if (sk->sk_state != DCCP_CLOSED) {
960 /*
961 * Normal connection termination. May need to wait if there are
962 * still packets in the TX queue that are delayed by the CCID.
963 */
964 dccp_flush_write_queue(sk, &timeout);
954 dccp_terminate_connection(sk); 965 dccp_terminate_connection(sk);
955 } 966 }
956 967
968 /*
969 * Flush write queue. This may be necessary in several cases:
970 * - we have been closed by the peer but still have application data;
971 * - abortive termination (unread data or zero linger time),
972 * - normal termination but queue could not be flushed within time limit
973 */
974 __skb_queue_purge(&sk->sk_write_queue);
975
957 sk_stream_wait_close(sk, timeout); 976 sk_stream_wait_close(sk, timeout);
958 977
959adjudge_to_death: 978adjudge_to_death:
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1a9aa05d4dc4..7587870b7040 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -237,32 +237,35 @@ out:
237 sock_put(sk); 237 sock_put(sk);
238} 238}
239 239
240/* Transmit-delay timer: used by the CCIDs to delay actual send time */ 240/**
241static void dccp_write_xmit_timer(unsigned long data) 241 * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
242 * See the comments above %ccid_dequeueing_decision for supported modes.
243 */
244static void dccp_write_xmitlet(unsigned long data)
242{ 245{
243 struct sock *sk = (struct sock *)data; 246 struct sock *sk = (struct sock *)data;
244 struct dccp_sock *dp = dccp_sk(sk);
245 247
246 bh_lock_sock(sk); 248 bh_lock_sock(sk);
247 if (sock_owned_by_user(sk)) 249 if (sock_owned_by_user(sk))
248 sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); 250 sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
249 else 251 else
250 dccp_write_xmit(sk, 0); 252 dccp_write_xmit(sk);
251 bh_unlock_sock(sk); 253 bh_unlock_sock(sk);
252 sock_put(sk);
253} 254}
254 255
255static void dccp_init_write_xmit_timer(struct sock *sk) 256static void dccp_write_xmit_timer(unsigned long data)
256{ 257{
257 struct dccp_sock *dp = dccp_sk(sk); 258 dccp_write_xmitlet(data);
258 259 sock_put((struct sock *)data);
259 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
260 (unsigned long)sk);
261} 260}
262 261
263void dccp_init_xmit_timers(struct sock *sk) 262void dccp_init_xmit_timers(struct sock *sk)
264{ 263{
265 dccp_init_write_xmit_timer(sk); 264 struct dccp_sock *dp = dccp_sk(sk);
265
266 tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
267 setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
268 (unsigned long)sk);
266 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 269 inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
267 &dccp_keepalive_timer); 270 &dccp_keepalive_timer);
268} 271}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..a76b78de679f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -155,7 +155,7 @@ static const struct proto_ops dn_proto_ops;
155static DEFINE_RWLOCK(dn_hash_lock); 155static DEFINE_RWLOCK(dn_hash_lock);
156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; 156static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
157static struct hlist_head dn_wild_sk; 157static struct hlist_head dn_wild_sk;
158static atomic_t decnet_memory_allocated; 158static atomic_long_t decnet_memory_allocated;
159 159
160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); 160static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); 161static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 0363bb95cc7d..a085dbcf5c7f 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -48,7 +48,6 @@
48#include <net/dn_neigh.h> 48#include <net/dn_neigh.h>
49#include <net/dn_route.h> 49#include <net/dn_route.h>
50 50
51static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev);
52static int dn_neigh_construct(struct neighbour *); 51static int dn_neigh_construct(struct neighbour *);
53static void dn_long_error_report(struct neighbour *, struct sk_buff *); 52static void dn_long_error_report(struct neighbour *, struct sk_buff *);
54static void dn_short_error_report(struct neighbour *, struct sk_buff *); 53static void dn_short_error_report(struct neighbour *, struct sk_buff *);
@@ -93,6 +92,13 @@ static const struct neigh_ops dn_phase3_ops = {
93 .queue_xmit = dev_queue_xmit 92 .queue_xmit = dev_queue_xmit
94}; 93};
95 94
95static u32 dn_neigh_hash(const void *pkey,
96 const struct net_device *dev,
97 __u32 hash_rnd)
98{
99 return jhash_2words(*(__u16 *)pkey, 0, hash_rnd);
100}
101
96struct neigh_table dn_neigh_table = { 102struct neigh_table dn_neigh_table = {
97 .family = PF_DECnet, 103 .family = PF_DECnet,
98 .entry_size = sizeof(struct dn_neigh), 104 .entry_size = sizeof(struct dn_neigh),
@@ -122,11 +128,6 @@ struct neigh_table dn_neigh_table = {
122 .gc_thresh3 = 1024, 128 .gc_thresh3 = 1024,
123}; 129};
124 130
125static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev)
126{
127 return jhash_2words(*(__u16 *)pkey, 0, dn_neigh_table.hash_rnd);
128}
129
130static int dn_neigh_construct(struct neighbour *neigh) 131static int dn_neigh_construct(struct neighbour *neigh)
131{ 132{
132 struct net_device *dev = neigh->dev; 133 struct net_device *dev = neigh->dev;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011b..2ef115277bea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
693 aux = scp->accessdata.acc_userl; 693 aux = scp->accessdata.acc_userl;
694 *skb_put(skb, 1) = aux; 694 *skb_put(skb, 1) = aux;
695 if (aux > 0) 695 if (aux > 0)
696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); 696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
697 697
698 aux = scp->accessdata.acc_passl; 698 aux = scp->accessdata.acc_passl;
699 *skb_put(skb, 1) = aux; 699 *skb_put(skb, 1) = aux;
700 if (aux > 0) 700 if (aux > 0)
701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); 701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
702 702
703 aux = scp->accessdata.acc_accl; 703 aux = scp->accessdata.acc_accl;
704 *skb_put(skb, 1) = aux; 704 *skb_put(skb, 1) = aux;
705 if (aux > 0) 705 if (aux > 0)
706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); 706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
707 707
708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); 708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
709 *skb_put(skb, 1) = aux; 709 *skb_put(skb, 1) = aux;
710 if (aux > 0) 710 if (aux > 0)
711 memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); 711 memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
712 712
713 scp->persist = dn_nsp_persist(sk); 713 scp->persist = dn_nsp_persist(sk);
714 scp->persist_fxn = dn_nsp_retrans_conninit; 714 scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6585ea6d1182..df0f3e54ff8a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = {
132 .negative_advice = dn_dst_negative_advice, 132 .negative_advice = dn_dst_negative_advice,
133 .link_failure = dn_dst_link_failure, 133 .link_failure = dn_dst_link_failure,
134 .update_pmtu = dn_dst_update_pmtu, 134 .update_pmtu = dn_dst_update_pmtu,
135 .entries = ATOMIC_INIT(0),
136}; 135};
137 136
138static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 137static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void)
1758 dn_dst_ops.kmem_cachep = 1757 dn_dst_ops.kmem_cachep =
1759 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1758 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1760 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1759 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1760 dst_entries_init(&dn_dst_ops);
1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0); 1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1763 add_timer(&dn_route_timer); 1763 add_timer(&dn_route_timer);
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void)
1816 dn_run_flush(0); 1816 dn_run_flush(0);
1817 1817
1818 proc_net_remove(&init_net, "decnet_cache"); 1818 proc_net_remove(&init_net, "decnet_cache");
1819 dst_entries_destroy(&dn_dst_ops);
1819} 1820}
1820 1821
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index be3eb8e23288..28f8b5e5f73b 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -38,7 +38,7 @@ int decnet_log_martians = 1;
38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW; 38int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
39 39
40/* Reasonable defaults, I hope, based on tcp's defaults */ 40/* Reasonable defaults, I hope, based on tcp's defaults */
41int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 }; 41long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 }; 42int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 }; 43int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
44 44
@@ -324,7 +324,7 @@ static ctl_table dn_table[] = {
324 .data = &sysctl_decnet_mem, 324 .data = &sysctl_decnet_mem,
325 .maxlen = sizeof(sysctl_decnet_mem), 325 .maxlen = sizeof(sysctl_decnet_mem),
326 .mode = 0644, 326 .mode = 0644,
327 .proc_handler = proc_dointvec, 327 .proc_handler = proc_doulongvec_minmax
328 }, 328 },
329 { 329 {
330 .procname = "decnet_rmem", 330 .procname = "decnet_rmem",
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083b..f8c1ae4b41f0 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
392 dev_queue_xmit(skb); 392 dev_queue_xmit(skb);
393 dev_put(dev); 393 dev_put(dev);
394 mutex_unlock(&econet_mutex); 394 mutex_unlock(&econet_mutex);
395 return(len); 395 return len;
396 396
397 out_free: 397 out_free:
398 kfree_skb(skb); 398 kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
637 eo->num = protocol; 637 eo->num = protocol;
638 638
639 econet_insert_socket(&econet_sklist, sk); 639 econet_insert_socket(&econet_sklist, sk);
640 return(0); 640 return 0;
641out: 641out:
642 return err; 642 return err;
643} 643}
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
1009 struct sockaddr_in sin; 1009 struct sockaddr_in sin;
1010 1010
1011 skb_queue_head_init(&aun_queue); 1011 skb_queue_head_init(&aun_queue);
1012 spin_lock_init(&aun_queue_lock);
1013 setup_timer(&ab_cleanup_timer, ab_cleanup, 0); 1012 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1014 ab_cleanup_timer.expires = jiffies + (HZ*2); 1013 ab_cleanup_timer.expires = jiffies + (HZ*2);
1015 add_timer(&ab_cleanup_timer); 1014 add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
1167 goto out; 1166 goto out;
1168 sock_register(&econet_family_ops); 1167 sock_register(&econet_family_ops);
1169#ifdef CONFIG_ECONET_AUNUDP 1168#ifdef CONFIG_ECONET_AUNUDP
1170 spin_lock_init(&aun_queue_lock);
1171 aun_udp_initialise(); 1169 aun_udp_initialise();
1172#endif 1170#endif
1173#ifdef CONFIG_ECONET_NATIVE 1171#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9d..f00ef2f1d814 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
367EXPORT_SYMBOL(alloc_etherdev_mq); 367EXPORT_SYMBOL(alloc_etherdev_mq);
368 368
369static size_t _format_mac_addr(char *buf, int buflen, 369static size_t _format_mac_addr(char *buf, int buflen,
370 const unsigned char *addr, int len) 370 const unsigned char *addr, int len)
371{ 371{
372 int i; 372 int i;
373 char *cp = buf; 373 char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); 376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
377 if (i == len - 1) 377 if (i == len - 1)
378 break; 378 break;
379 cp += strlcpy(cp, ":", buflen - (cp - buf)); 379 cp += scnprintf(cp, buflen - (cp - buf), ":");
380 } 380 }
381 return cp - buf; 381 return cp - buf;
382} 382}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
386 size_t l; 386 size_t l;
387 387
388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len); 388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
389 l += strlcpy(buf + l, "\n", PAGE_SIZE - l); 389 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
390 return ((ssize_t) l); 390 return (ssize_t)l;
391} 391}
392EXPORT_SYMBOL(sysfs_format_mac); 392EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7cd7760144f7..9e95d7fb6d5a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -84,7 +84,7 @@ config IP_FIB_TRIE
84 84
85 An experimental study of compression methods for dynamic tries 85 An experimental study of compression methods for dynamic tries
86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
87 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 87 <http://www.csc.kth.se/~snilsson/software/dyntrie2/>
88 88
89endchoice 89endchoice
90 90
@@ -215,9 +215,15 @@ config NET_IPIP
215 be inserted in and removed from the running kernel whenever you 215 be inserted in and removed from the running kernel whenever you
216 want). Most people won't need this and can say N. 216 want). Most people won't need this and can say N.
217 217
218config NET_IPGRE_DEMUX
219 tristate "IP: GRE demultiplexer"
220 help
221 This is helper module to demultiplex GRE packets on GRE version field criteria.
222 Required by ip_gre and pptp modules.
223
218config NET_IPGRE 224config NET_IPGRE
219 tristate "IP: GRE tunnels over IP" 225 tristate "IP: GRE tunnels over IP"
220 depends on IPV6 || IPV6=n 226 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
221 help 227 help
222 Tunneling means encapsulating data of one protocol type within 228 Tunneling means encapsulating data of one protocol type within
223 another protocol and sending it over a channel that understands the 229 another protocol and sending it over a channel that understands the
@@ -556,7 +562,7 @@ config TCP_CONG_VENO
556 distinguishing to circumvent the difficult judgment of the packet loss 562 distinguishing to circumvent the difficult judgment of the packet loss
557 type. TCP Veno cuts down less congestion window in response to random 563 type. TCP Veno cuts down less congestion window in response to random
558 loss packets. 564 loss packets.
559 See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 565 See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186>
560 566
561config TCP_CONG_YEAH 567config TCP_CONG_YEAH
562 tristate "YeAH TCP" 568 tristate "YeAH TCP"
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 21obj-$(CONFIG_IP_MROUTE) += ipmr.o
22obj-$(CONFIG_NET_IPIP) += ipip.o 22obj-$(CONFIG_NET_IPIP) += ipip.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_SYN_COOKIES) += syncookies.o 25obj-$(CONFIG_SYN_COOKIES) += syncookies.o
25obj-$(CONFIG_INET_AH) += ah4.o 26obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9f..f581f77d1097 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
227 227
228/* 228/*
229 * inet_ehash_secret must be set exactly once 229 * inet_ehash_secret must be set exactly once
230 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
231 */ 230 */
232void build_ehash_secret(void) 231void build_ehash_secret(void)
233{ 232{
234 u32 rnd; 233 u32 rnd;
234
235 do { 235 do {
236 get_random_bytes(&rnd, sizeof(rnd)); 236 get_random_bytes(&rnd, sizeof(rnd));
237 } while (rnd == 0); 237 } while (rnd == 0);
238 spin_lock_bh(&inetsw_lock); 238
239 if (!inet_ehash_secret) 239 cmpxchg(&inet_ehash_secret, 0, rnd);
240 inet_ehash_secret = rnd;
241 spin_unlock_bh(&inetsw_lock);
242} 240}
243EXPORT_SYMBOL(build_ehash_secret); 241EXPORT_SYMBOL(build_ehash_secret);
244 242
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2f..d8e540c5b071 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
55 * Stuart Cheshire : Metricom and grat arp fixes 55 * Stuart Cheshire : Metricom and grat arp fixes
56 * *** FOR 2.1 clean this up *** 56 * *** FOR 2.1 clean this up ***
57 * Lawrence V. Stefani: (08/12/96) Added FDDI support. 57 * Lawrence V. Stefani: (08/12/96) Added FDDI support.
58 * Alan Cox : Took the AP1000 nasty FDDI hack and 58 * Alan Cox : Took the AP1000 nasty FDDI hack and
59 * folded into the mainstream FDDI code. 59 * folded into the mainstream FDDI code.
60 * Ack spit, Linus how did you allow that 60 * Ack spit, Linus how did you allow that
61 * one in... 61 * one in...
@@ -120,14 +120,14 @@ EXPORT_SYMBOL(clip_tbl_hook);
120#endif 120#endif
121 121
122#include <asm/system.h> 122#include <asm/system.h>
123#include <asm/uaccess.h> 123#include <linux/uaccess.h>
124 124
125#include <linux/netfilter_arp.h> 125#include <linux/netfilter_arp.h>
126 126
127/* 127/*
128 * Interface to generic neighbour cache. 128 * Interface to generic neighbour cache.
129 */ 129 */
130static u32 arp_hash(const void *pkey, const struct net_device *dev); 130static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd);
131static int arp_constructor(struct neighbour *neigh); 131static int arp_constructor(struct neighbour *neigh);
132static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); 132static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
133static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); 133static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = {
161 .queue_xmit = dev_queue_xmit, 161 .queue_xmit = dev_queue_xmit,
162}; 162};
163 163
164const struct neigh_ops arp_broken_ops = { 164static const struct neigh_ops arp_broken_ops = {
165 .family = AF_INET, 165 .family = AF_INET,
166 .solicit = arp_solicit, 166 .solicit = arp_solicit,
167 .error_report = arp_error_report, 167 .error_report = arp_error_report,
@@ -170,35 +170,34 @@ const struct neigh_ops arp_broken_ops = {
170 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
171 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
172}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
174 173
175struct neigh_table arp_tbl = { 174struct neigh_table arp_tbl = {
176 .family = AF_INET, 175 .family = AF_INET,
177 .entry_size = sizeof(struct neighbour) + 4, 176 .entry_size = sizeof(struct neighbour) + 4,
178 .key_len = 4, 177 .key_len = 4,
179 .hash = arp_hash, 178 .hash = arp_hash,
180 .constructor = arp_constructor, 179 .constructor = arp_constructor,
181 .proxy_redo = parp_redo, 180 .proxy_redo = parp_redo,
182 .id = "arp_cache", 181 .id = "arp_cache",
183 .parms = { 182 .parms = {
184 .tbl = &arp_tbl, 183 .tbl = &arp_tbl,
185 .base_reachable_time = 30 * HZ, 184 .base_reachable_time = 30 * HZ,
186 .retrans_time = 1 * HZ, 185 .retrans_time = 1 * HZ,
187 .gc_staletime = 60 * HZ, 186 .gc_staletime = 60 * HZ,
188 .reachable_time = 30 * HZ, 187 .reachable_time = 30 * HZ,
189 .delay_probe_time = 5 * HZ, 188 .delay_probe_time = 5 * HZ,
190 .queue_len = 3, 189 .queue_len = 3,
191 .ucast_probes = 3, 190 .ucast_probes = 3,
192 .mcast_probes = 3, 191 .mcast_probes = 3,
193 .anycast_delay = 1 * HZ, 192 .anycast_delay = 1 * HZ,
194 .proxy_delay = (8 * HZ) / 10, 193 .proxy_delay = (8 * HZ) / 10,
195 .proxy_qlen = 64, 194 .proxy_qlen = 64,
196 .locktime = 1 * HZ, 195 .locktime = 1 * HZ,
197 }, 196 },
198 .gc_interval = 30 * HZ, 197 .gc_interval = 30 * HZ,
199 .gc_thresh1 = 128, 198 .gc_thresh1 = 128,
200 .gc_thresh2 = 512, 199 .gc_thresh2 = 512,
201 .gc_thresh3 = 1024, 200 .gc_thresh3 = 1024,
202}; 201};
203EXPORT_SYMBOL(arp_tbl); 202EXPORT_SYMBOL(arp_tbl);
204 203
@@ -226,14 +225,16 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
226} 225}
227 226
228 227
229static u32 arp_hash(const void *pkey, const struct net_device *dev) 228static u32 arp_hash(const void *pkey,
229 const struct net_device *dev,
230 __u32 hash_rnd)
230{ 231{
231 return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd); 232 return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd);
232} 233}
233 234
234static int arp_constructor(struct neighbour *neigh) 235static int arp_constructor(struct neighbour *neigh)
235{ 236{
236 __be32 addr = *(__be32*)neigh->primary_key; 237 __be32 addr = *(__be32 *)neigh->primary_key;
237 struct net_device *dev = neigh->dev; 238 struct net_device *dev = neigh->dev;
238 struct in_device *in_dev; 239 struct in_device *in_dev;
239 struct neigh_parms *parms; 240 struct neigh_parms *parms;
@@ -296,16 +297,19 @@ static int arp_constructor(struct neighbour *neigh)
296 neigh->ops = &arp_broken_ops; 297 neigh->ops = &arp_broken_ops;
297 neigh->output = neigh->ops->output; 298 neigh->output = neigh->ops->output;
298 return 0; 299 return 0;
300#else
301 break;
299#endif 302#endif
300 ;} 303 }
301#endif 304#endif
302 if (neigh->type == RTN_MULTICAST) { 305 if (neigh->type == RTN_MULTICAST) {
303 neigh->nud_state = NUD_NOARP; 306 neigh->nud_state = NUD_NOARP;
304 arp_mc_map(addr, neigh->ha, dev, 1); 307 arp_mc_map(addr, neigh->ha, dev, 1);
305 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { 308 } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
306 neigh->nud_state = NUD_NOARP; 309 neigh->nud_state = NUD_NOARP;
307 memcpy(neigh->ha, dev->dev_addr, dev->addr_len); 310 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
308 } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) { 311 } else if (neigh->type == RTN_BROADCAST ||
312 (dev->flags & IFF_POINTOPOINT)) {
309 neigh->nud_state = NUD_NOARP; 313 neigh->nud_state = NUD_NOARP;
310 memcpy(neigh->ha, dev->broadcast, dev->addr_len); 314 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
311 } 315 }
@@ -315,7 +319,7 @@ static int arp_constructor(struct neighbour *neigh)
315 else 319 else
316 neigh->ops = &arp_generic_ops; 320 neigh->ops = &arp_generic_ops;
317 321
318 if (neigh->nud_state&NUD_VALID) 322 if (neigh->nud_state & NUD_VALID)
319 neigh->output = neigh->ops->connected_output; 323 neigh->output = neigh->ops->connected_output;
320 else 324 else
321 neigh->output = neigh->ops->output; 325 neigh->output = neigh->ops->output;
@@ -334,7 +338,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
334 __be32 saddr = 0; 338 __be32 saddr = 0;
335 u8 *dst_ha = NULL; 339 u8 *dst_ha = NULL;
336 struct net_device *dev = neigh->dev; 340 struct net_device *dev = neigh->dev;
337 __be32 target = *(__be32*)neigh->primary_key; 341 __be32 target = *(__be32 *)neigh->primary_key;
338 int probes = atomic_read(&neigh->probes); 342 int probes = atomic_read(&neigh->probes);
339 struct in_device *in_dev; 343 struct in_device *in_dev;
340 344
@@ -347,7 +351,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 351 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
348 default: 352 default:
349 case 0: /* By default announce any local IP */ 353 case 0: /* By default announce any local IP */
350 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) 354 if (skb && inet_addr_type(dev_net(dev),
355 ip_hdr(skb)->saddr) == RTN_LOCAL)
351 saddr = ip_hdr(skb)->saddr; 356 saddr = ip_hdr(skb)->saddr;
352 break; 357 break;
353 case 1: /* Restrict announcements of saddr in same subnet */ 358 case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +374,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
369 if (!saddr) 374 if (!saddr)
370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 375 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
371 376
372 if ((probes -= neigh->parms->ucast_probes) < 0) { 377 probes -= neigh->parms->ucast_probes;
373 if (!(neigh->nud_state&NUD_VALID)) 378 if (probes < 0) {
374 printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); 379 if (!(neigh->nud_state & NUD_VALID))
380 printk(KERN_DEBUG
381 "trying to ucast probe in NUD_INVALID\n");
375 dst_ha = neigh->ha; 382 dst_ha = neigh->ha;
376 read_lock_bh(&neigh->lock); 383 read_lock_bh(&neigh->lock);
377 } else if ((probes -= neigh->parms->app_probes) < 0) { 384 } else {
385 probes -= neigh->parms->app_probes;
386 if (probes < 0) {
378#ifdef CONFIG_ARPD 387#ifdef CONFIG_ARPD
379 neigh_app_ns(neigh); 388 neigh_app_ns(neigh);
380#endif 389#endif
381 return; 390 return;
391 }
382 } 392 }
383 393
384 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 394 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +461,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
451 * is allowed to use this function, it is scheduled to be removed. --ANK 461 * is allowed to use this function, it is scheduled to be removed. --ANK
452 */ 462 */
453 463
454static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) 464static int arp_set_predefined(int addr_hint, unsigned char *haddr,
465 __be32 paddr, struct net_device *dev)
455{ 466{
456 switch (addr_hint) { 467 switch (addr_hint) {
457 case RTN_LOCAL: 468 case RTN_LOCAL:
@@ -483,17 +494,16 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 494
484 paddr = skb_rtable(skb)->rt_gateway; 495 paddr = skb_rtable(skb)->rt_gateway;
485 496
486 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) 497 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
498 paddr, dev))
487 return 0; 499 return 0;
488 500
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 501 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
490 502
491 if (n) { 503 if (n) {
492 n->used = jiffies; 504 n->used = jiffies;
493 if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { 505 if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
494 read_lock_bh(&n->lock); 506 neigh_ha_snapshot(haddr, n, dev);
495 memcpy(haddr, n->ha, dev->addr_len);
496 read_unlock_bh(&n->lock);
497 neigh_release(n); 507 neigh_release(n);
498 return 0; 508 return 0;
499 } 509 }
@@ -515,13 +525,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
515 return -EINVAL; 525 return -EINVAL;
516 if (n == NULL) { 526 if (n == NULL) {
517 __be32 nexthop = ((struct rtable *)dst)->rt_gateway; 527 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
518 if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) 528 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
519 nexthop = 0; 529 nexthop = 0;
520 n = __neigh_lookup_errno( 530 n = __neigh_lookup_errno(
521#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 531#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
522 dev->type == ARPHRD_ATM ? clip_tbl_hook : 532 dev->type == ARPHRD_ATM ?
533 clip_tbl_hook :
523#endif 534#endif
524 &arp_tbl, &nexthop, dev); 535 &arp_tbl, &nexthop, dev);
525 if (IS_ERR(n)) 536 if (IS_ERR(n))
526 return PTR_ERR(n); 537 return PTR_ERR(n);
527 dst->neighbour = n; 538 dst->neighbour = n;
@@ -543,8 +554,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
543 554
544 if (!IN_DEV_PROXY_ARP(in_dev)) 555 if (!IN_DEV_PROXY_ARP(in_dev))
545 return 0; 556 return 0;
546 557 imi = IN_DEV_MEDIUM_ID(in_dev);
547 if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) 558 if (imi == 0)
548 return 1; 559 return 1;
549 if (imi == -1) 560 if (imi == -1)
550 return 0; 561 return 0;
@@ -555,7 +566,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
555 if (out_dev) 566 if (out_dev)
556 omi = IN_DEV_MEDIUM_ID(out_dev); 567 omi = IN_DEV_MEDIUM_ID(out_dev);
557 568
558 return (omi != imi && omi != -1); 569 return omi != imi && omi != -1;
559} 570}
560 571
561/* 572/*
@@ -685,7 +696,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
685 arp->ar_pln = 4; 696 arp->ar_pln = 4;
686 arp->ar_op = htons(type); 697 arp->ar_op = htons(type);
687 698
688 arp_ptr=(unsigned char *)(arp+1); 699 arp_ptr = (unsigned char *)(arp + 1);
689 700
690 memcpy(arp_ptr, src_hw, dev->addr_len); 701 memcpy(arp_ptr, src_hw, dev->addr_len);
691 arp_ptr += dev->addr_len; 702 arp_ptr += dev->addr_len;
@@ -735,9 +746,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
735 746
736 skb = arp_create(type, ptype, dest_ip, dev, src_ip, 747 skb = arp_create(type, ptype, dest_ip, dev, src_ip,
737 dest_hw, src_hw, target_hw); 748 dest_hw, src_hw, target_hw);
738 if (skb == NULL) { 749 if (skb == NULL)
739 return; 750 return;
740 }
741 751
742 arp_xmit(skb); 752 arp_xmit(skb);
743} 753}
@@ -815,7 +825,7 @@ static int arp_process(struct sk_buff *skb)
815/* 825/*
816 * Extract fields 826 * Extract fields
817 */ 827 */
818 arp_ptr= (unsigned char *)(arp+1); 828 arp_ptr = (unsigned char *)(arp + 1);
819 sha = arp_ptr; 829 sha = arp_ptr;
820 arp_ptr += dev->addr_len; 830 arp_ptr += dev->addr_len;
821 memcpy(&sip, arp_ptr, 4); 831 memcpy(&sip, arp_ptr, 4);
@@ -869,16 +879,17 @@ static int arp_process(struct sk_buff *skb)
869 addr_type = rt->rt_type; 879 addr_type = rt->rt_type;
870 880
871 if (addr_type == RTN_LOCAL) { 881 if (addr_type == RTN_LOCAL) {
872 int dont_send = 0; 882 int dont_send;
873 883
874 if (!dont_send) 884 dont_send = arp_ignore(in_dev, sip, tip);
875 dont_send |= arp_ignore(in_dev,sip,tip);
876 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 885 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
877 dont_send |= arp_filter(sip,tip,dev); 886 dont_send |= arp_filter(sip, tip, dev);
878 if (!dont_send) { 887 if (!dont_send) {
879 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 888 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
880 if (n) { 889 if (n) {
881 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 890 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
891 dev, tip, sha, dev->dev_addr,
892 sha);
882 neigh_release(n); 893 neigh_release(n);
883 } 894 }
884 } 895 }
@@ -887,8 +898,7 @@ static int arp_process(struct sk_buff *skb)
887 if (addr_type == RTN_UNICAST && 898 if (addr_type == RTN_UNICAST &&
888 (arp_fwd_proxy(in_dev, dev, rt) || 899 (arp_fwd_proxy(in_dev, dev, rt) ||
889 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 900 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
890 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) 901 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
891 {
892 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 902 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
893 if (n) 903 if (n)
894 neigh_release(n); 904 neigh_release(n);
@@ -896,9 +906,12 @@ static int arp_process(struct sk_buff *skb)
896 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 906 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
897 skb->pkt_type == PACKET_HOST || 907 skb->pkt_type == PACKET_HOST ||
898 in_dev->arp_parms->proxy_delay == 0) { 908 in_dev->arp_parms->proxy_delay == 0) {
899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 909 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
910 dev, tip, sha, dev->dev_addr,
911 sha);
900 } else { 912 } else {
901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 913 pneigh_enqueue(&arp_tbl,
914 in_dev->arp_parms, skb);
902 return 0; 915 return 0;
903 } 916 }
904 goto out; 917 goto out;
@@ -939,7 +952,8 @@ static int arp_process(struct sk_buff *skb)
939 if (arp->ar_op != htons(ARPOP_REPLY) || 952 if (arp->ar_op != htons(ARPOP_REPLY) ||
940 skb->pkt_type != PACKET_HOST) 953 skb->pkt_type != PACKET_HOST)
941 state = NUD_STALE; 954 state = NUD_STALE;
942 neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 955 neigh_update(n, sha, state,
956 override ? NEIGH_UPDATE_F_OVERRIDE : 0);
943 neigh_release(n); 957 neigh_release(n);
944 } 958 }
945 959
@@ -975,7 +989,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
975 arp->ar_pln != 4) 989 arp->ar_pln != 4)
976 goto freeskb; 990 goto freeskb;
977 991
978 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 992 skb = skb_share_check(skb, GFP_ATOMIC);
993 if (skb == NULL)
979 goto out_of_mem; 994 goto out_of_mem;
980 995
981 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 996 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1019 return -EINVAL; 1034 return -EINVAL;
1020 if (!dev && (r->arp_flags & ATF_COM)) { 1035 if (!dev && (r->arp_flags & ATF_COM)) {
1021 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1036 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
1022 r->arp_ha.sa_data); 1037 r->arp_ha.sa_data);
1023 if (!dev) 1038 if (!dev)
1024 return -ENODEV; 1039 return -ENODEV;
1025 } 1040 }
@@ -1033,7 +1048,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033} 1048}
1034 1049
1035static int arp_req_set(struct net *net, struct arpreq *r, 1050static int arp_req_set(struct net *net, struct arpreq *r,
1036 struct net_device * dev) 1051 struct net_device *dev)
1037{ 1052{
1038 __be32 ip; 1053 __be32 ip;
1039 struct neighbour *neigh; 1054 struct neighbour *neigh;
@@ -1046,10 +1061,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1046 if (r->arp_flags & ATF_PERM) 1061 if (r->arp_flags & ATF_PERM)
1047 r->arp_flags |= ATF_COM; 1062 r->arp_flags |= ATF_COM;
1048 if (dev == NULL) { 1063 if (dev == NULL) {
1049 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1064 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1050 .tos = RTO_ONLINK } } }; 1065 .tos = RTO_ONLINK } };
1051 struct rtable * rt; 1066 struct rtable *rt;
1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1067 err = ip_route_output_key(net, &rt, &fl);
1068 if (err != 0)
1053 return err; 1069 return err;
1054 dev = rt->dst.dev; 1070 dev = rt->dst.dev;
1055 ip_rt_put(rt); 1071 ip_rt_put(rt);
@@ -1083,9 +1099,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1083 unsigned state = NUD_STALE; 1099 unsigned state = NUD_STALE;
1084 if (r->arp_flags & ATF_PERM) 1100 if (r->arp_flags & ATF_PERM)
1085 state = NUD_PERMANENT; 1101 state = NUD_PERMANENT;
1086 err = neigh_update(neigh, (r->arp_flags&ATF_COM) ? 1102 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
1087 r->arp_ha.sa_data : NULL, state, 1103 r->arp_ha.sa_data : NULL, state,
1088 NEIGH_UPDATE_F_OVERRIDE| 1104 NEIGH_UPDATE_F_OVERRIDE |
1089 NEIGH_UPDATE_F_ADMIN); 1105 NEIGH_UPDATE_F_ADMIN);
1090 neigh_release(neigh); 1106 neigh_release(neigh);
1091 } 1107 }
@@ -1094,12 +1110,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1094 1110
1095static unsigned arp_state_to_flags(struct neighbour *neigh) 1111static unsigned arp_state_to_flags(struct neighbour *neigh)
1096{ 1112{
1097 unsigned flags = 0;
1098 if (neigh->nud_state&NUD_PERMANENT) 1113 if (neigh->nud_state&NUD_PERMANENT)
1099 flags = ATF_PERM|ATF_COM; 1114 return ATF_PERM | ATF_COM;
1100 else if (neigh->nud_state&NUD_VALID) 1115 else if (neigh->nud_state&NUD_VALID)
1101 flags = ATF_COM; 1116 return ATF_COM;
1102 return flags; 1117 else
1118 return 0;
1103} 1119}
1104 1120
1105/* 1121/*
@@ -1142,7 +1158,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1142} 1158}
1143 1159
1144static int arp_req_delete(struct net *net, struct arpreq *r, 1160static int arp_req_delete(struct net *net, struct arpreq *r,
1145 struct net_device * dev) 1161 struct net_device *dev)
1146{ 1162{
1147 int err; 1163 int err;
1148 __be32 ip; 1164 __be32 ip;
@@ -1153,10 +1169,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1153 1169
1154 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1170 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1155 if (dev == NULL) { 1171 if (dev == NULL) {
1156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1172 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1157 .tos = RTO_ONLINK } } }; 1173 .tos = RTO_ONLINK } };
1158 struct rtable * rt; 1174 struct rtable *rt;
1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1175 err = ip_route_output_key(net, &rt, &fl);
1176 if (err != 0)
1160 return err; 1177 return err;
1161 dev = rt->dst.dev; 1178 dev = rt->dst.dev;
1162 ip_rt_put(rt); 1179 ip_rt_put(rt);
@@ -1166,7 +1183,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1166 err = -ENXIO; 1183 err = -ENXIO;
1167 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1184 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1168 if (neigh) { 1185 if (neigh) {
1169 if (neigh->nud_state&~NUD_NOARP) 1186 if (neigh->nud_state & ~NUD_NOARP)
1170 err = neigh_update(neigh, NULL, NUD_FAILED, 1187 err = neigh_update(neigh, NULL, NUD_FAILED,
1171 NEIGH_UPDATE_F_OVERRIDE| 1188 NEIGH_UPDATE_F_OVERRIDE|
1172 NEIGH_UPDATE_F_ADMIN); 1189 NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1203,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1186 struct net_device *dev = NULL; 1203 struct net_device *dev = NULL;
1187 1204
1188 switch (cmd) { 1205 switch (cmd) {
1189 case SIOCDARP: 1206 case SIOCDARP:
1190 case SIOCSARP: 1207 case SIOCSARP:
1191 if (!capable(CAP_NET_ADMIN)) 1208 if (!capable(CAP_NET_ADMIN))
1192 return -EPERM; 1209 return -EPERM;
1193 case SIOCGARP: 1210 case SIOCGARP:
1194 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1211 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1195 if (err) 1212 if (err)
1196 return -EFAULT; 1213 return -EFAULT;
1197 break; 1214 break;
1198 default: 1215 default:
1199 return -EINVAL; 1216 return -EINVAL;
1200 } 1217 }
1201 1218
1202 if (r.arp_pa.sa_family != AF_INET) 1219 if (r.arp_pa.sa_family != AF_INET)
1203 return -EPFNOSUPPORT; 1220 return -EPFNOSUPPORT;
1204 1221
1205 if (!(r.arp_flags & ATF_PUBL) && 1222 if (!(r.arp_flags & ATF_PUBL) &&
1206 (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB))) 1223 (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
1207 return -EINVAL; 1224 return -EINVAL;
1208 if (!(r.arp_flags & ATF_NETMASK)) 1225 if (!(r.arp_flags & ATF_NETMASK))
1209 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1226 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1228,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1211 rtnl_lock(); 1228 rtnl_lock();
1212 if (r.arp_dev[0]) { 1229 if (r.arp_dev[0]) {
1213 err = -ENODEV; 1230 err = -ENODEV;
1214 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) 1231 dev = __dev_get_by_name(net, r.arp_dev);
1232 if (dev == NULL)
1215 goto out; 1233 goto out;
1216 1234
1217 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1235 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1261,8 @@ out:
1243 return err; 1261 return err;
1244} 1262}
1245 1263
1246static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1264static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1265 void *ptr)
1247{ 1266{
1248 struct net_device *dev = ptr; 1267 struct net_device *dev = ptr;
1249 1268
@@ -1311,12 +1330,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
1311 for (n = 0, s = buf; n < 6; n++) { 1330 for (n = 0, s = buf; n < 6; n++) {
1312 c = (a->ax25_call[n] >> 1) & 0x7F; 1331 c = (a->ax25_call[n] >> 1) & 0x7F;
1313 1332
1314 if (c != ' ') *s++ = c; 1333 if (c != ' ')
1334 *s++ = c;
1315 } 1335 }
1316 1336
1317 *s++ = '-'; 1337 *s++ = '-';
1318 1338 n = (a->ax25_call[6] >> 1) & 0x0F;
1319 if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { 1339 if (n > 9) {
1320 *s++ = '1'; 1340 *s++ = '1';
1321 n -= 10; 1341 n -= 10;
1322 } 1342 }
@@ -1325,10 +1345,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
1325 *s++ = '\0'; 1345 *s++ = '\0';
1326 1346
1327 if (*buf == '\0' || *buf == '-') 1347 if (*buf == '\0' || *buf == '-')
1328 return "*"; 1348 return "*";
1329 1349
1330 return buf; 1350 return buf;
1331
1332} 1351}
1333#endif /* CONFIG_AX25 */ 1352#endif /* CONFIG_AX25 */
1334 1353
@@ -1408,10 +1427,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1408/* ------------------------------------------------------------------------ */ 1427/* ------------------------------------------------------------------------ */
1409 1428
1410static const struct seq_operations arp_seq_ops = { 1429static const struct seq_operations arp_seq_ops = {
1411 .start = arp_seq_start, 1430 .start = arp_seq_start,
1412 .next = neigh_seq_next, 1431 .next = neigh_seq_next,
1413 .stop = neigh_seq_stop, 1432 .stop = neigh_seq_stop,
1414 .show = arp_seq_show, 1433 .show = arp_seq_show,
1415}; 1434};
1416 1435
1417static int arp_seq_open(struct inode *inode, struct file *file) 1436static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 3a92a76ae41d..094e150c6260 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * The CIPSO draft specification can be found in the kernel's Documentation 10 * The CIPSO draft specification can be found in the kernel's Documentation
11 * directory as well as the following URL: 11 * directory as well as the following URL:
12 * http://netlabel.sourceforge.net/files/draft-ietf-cipso-ipsecurity-01.txt 12 * http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt
13 * The FIPS-188 specification can be found at the following URL: 13 * The FIPS-188 specification can be found at the following URL:
14 * http://www.itl.nist.gov/fipspubs/fip188.htm 14 * http://www.itl.nist.gov/fipspubs/fip188.htm
15 * 15 *
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45c..174be6caa5c8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
73 inet->inet_id = jiffies; 73 inet->inet_id = jiffies;
74 74
75 sk_dst_set(sk, &rt->dst); 75 sk_dst_set(sk, &rt->dst);
76 return(0); 76 return 0;
77} 77}
78EXPORT_SYMBOL(ip4_datagram_connect); 78EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f4..dc94b0316b78 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
209 inet_free_ifa(ifa); 209 inet_free_ifa(ifa);
210 } 210 }
211 211
212 dev->ip_ptr = NULL; 212 rcu_assign_pointer(dev->ip_ptr, NULL);
213 213
214 devinet_sysctl_unregister(in_dev); 214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -403,6 +403,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
403 return inet_insert_ifa(ifa); 403 return inet_insert_ifa(ifa);
404} 404}
405 405
406/* Caller must hold RCU or RTNL :
407 * We dont take a reference on found in_device
408 */
406struct in_device *inetdev_by_index(struct net *net, int ifindex) 409struct in_device *inetdev_by_index(struct net *net, int ifindex)
407{ 410{
408 struct net_device *dev; 411 struct net_device *dev;
@@ -411,7 +414,7 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex)
411 rcu_read_lock(); 414 rcu_read_lock();
412 dev = dev_get_by_index_rcu(net, ifindex); 415 dev = dev_get_by_index_rcu(net, ifindex);
413 if (dev) 416 if (dev)
414 in_dev = in_dev_get(dev); 417 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
415 rcu_read_unlock(); 418 rcu_read_unlock();
416 return in_dev; 419 return in_dev;
417} 420}
@@ -453,8 +456,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
453 goto errout; 456 goto errout;
454 } 457 }
455 458
456 __in_dev_put(in_dev);
457
458 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459 ifap = &ifa->ifa_next) { 460 ifap = &ifa->ifa_next) {
460 if (tb[IFA_LOCAL] && 461 if (tb[IFA_LOCAL] &&
@@ -1059,7 +1060,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1059 switch (event) { 1060 switch (event) {
1060 case NETDEV_REGISTER: 1061 case NETDEV_REGISTER:
1061 printk(KERN_DEBUG "inetdev_event: bug\n"); 1062 printk(KERN_DEBUG "inetdev_event: bug\n");
1062 dev->ip_ptr = NULL; 1063 rcu_assign_pointer(dev->ip_ptr, NULL);
1063 break; 1064 break;
1064 case NETDEV_UP: 1065 case NETDEV_UP:
1065 if (!inetdev_valid_mtu(dev->mtu)) 1066 if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999fa..eb6f69a8f27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -147,35 +147,43 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 147 rt_cache_flush(net, -1);
148} 148}
149 149
150/* 150/**
151 * Find the first device with a given source address. 151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
152 */ 157 */
153 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{ 159{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 160 struct flowi fl = {
157 struct fib_result res; 161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
158 struct net_device *dev = NULL; 169 struct net_device *dev = NULL;
159 struct fib_table *local_table;
160 170
161#ifdef CONFIG_IP_MULTIPLE_TABLES 171 rcu_read_lock();
162 res.r = NULL; 172 if (fib_lookup(net, &fl, &res)) {
163#endif 173 rcu_read_unlock();
164
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL; 174 return NULL;
175 }
168 if (res.type != RTN_LOCAL) 176 if (res.type != RTN_LOCAL)
169 goto out; 177 goto out;
170 dev = FIB_RES_DEV(res); 178 dev = FIB_RES_DEV(res);
171 179
172 if (dev) 180 if (dev && devref)
173 dev_hold(dev); 181 dev_hold(dev);
174out: 182out:
175 fib_res_put(&res); 183 rcu_read_unlock();
176 return dev; 184 return dev;
177} 185}
178EXPORT_SYMBOL(ip_dev_find); 186EXPORT_SYMBOL(__ip_dev_find);
179 187
180/* 188/*
181 * Find address type as if only "dev" was present in the system. If 189 * Find address type as if only "dev" was present in the system. If
@@ -202,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
202 local_table = fib_get_table(net, RT_TABLE_LOCAL); 210 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) { 211 if (local_table) {
204 ret = RTN_UNICAST; 212 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) { 213 rcu_read_lock();
214 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
206 if (!dev || dev == res.fi->fib_dev) 215 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type; 216 ret = res.type;
208 fib_res_put(&res);
209 } 217 }
218 rcu_read_unlock();
210 } 219 }
211 return ret; 220 return ret;
212} 221}
@@ -220,30 +229,34 @@ EXPORT_SYMBOL(inet_addr_type);
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 229unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr) 230 __be32 addr)
222{ 231{
223 return __inet_dev_addr_type(net, dev, addr); 232 return __inet_dev_addr_type(net, dev, addr);
224} 233}
225EXPORT_SYMBOL(inet_dev_addr_type); 234EXPORT_SYMBOL(inet_dev_addr_type);
226 235
227/* Given (packet source, input interface) and optional (dst, oif, tos): 236/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local 237 * - (main) check, that source is valid i.e. not broadcast or our local
229 address. 238 * address.
230 - figure out what "logical" interface this packet arrived 239 * - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address. 240 * and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface. 241 * - check, that packet arrived from expected physical interface.
242 * called with rcu_read_lock()
233 */ 243 */
234
235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 244int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236 struct net_device *dev, __be32 *spec_dst, 245 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark) 246 u32 *itag, u32 mark)
238{ 247{
239 struct in_device *in_dev; 248 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u = 249 struct flowi fl = {
241 { .daddr = src, 250 .nl_u = {
242 .saddr = dst, 251 .ip4_u = {
243 .tos = tos } }, 252 .daddr = src,
244 .mark = mark, 253 .saddr = dst,
245 .iif = oif }; 254 .tos = tos
246 255 }
256 },
257 .mark = mark,
258 .iif = oif
259 };
247 struct fib_result res; 260 struct fib_result res;
248 int no_addr, rpf, accept_local; 261 int no_addr, rpf, accept_local;
249 bool dev_match; 262 bool dev_match;
@@ -251,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
251 struct net *net; 264 struct net *net;
252 265
253 no_addr = rpf = accept_local = 0; 266 no_addr = rpf = accept_local = 0;
254 rcu_read_lock();
255 in_dev = __in_dev_get_rcu(dev); 267 in_dev = __in_dev_get_rcu(dev);
256 if (in_dev) { 268 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL; 269 no_addr = in_dev->ifa_list == NULL;
@@ -260,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
260 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 272 if (mark && !IN_DEV_SRC_VMARK(in_dev))
261 fl.mark = 0; 273 fl.mark = 0;
262 } 274 }
263 rcu_read_unlock();
264 275
265 if (in_dev == NULL) 276 if (in_dev == NULL)
266 goto e_inval; 277 goto e_inval;
@@ -270,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
270 goto last_resort; 281 goto last_resort;
271 if (res.type != RTN_UNICAST) { 282 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local) 283 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res; 284 goto e_inval;
274 } 285 }
275 *spec_dst = FIB_RES_PREFSRC(res); 286 *spec_dst = FIB_RES_PREFSRC(res);
276 fib_combine_itag(itag, &res); 287 fib_combine_itag(itag, &res);
@@ -291,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
291#endif 302#endif
292 if (dev_match) { 303 if (dev_match) {
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 304 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret; 305 return ret;
296 } 306 }
297 fib_res_put(&res);
298 if (no_addr) 307 if (no_addr)
299 goto last_resort; 308 goto last_resort;
300 if (rpf == 1) 309 if (rpf == 1)
@@ -307,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
307 *spec_dst = FIB_RES_PREFSRC(res); 316 *spec_dst = FIB_RES_PREFSRC(res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 317 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309 } 318 }
310 fib_res_put(&res);
311 } 319 }
312 return ret; 320 return ret;
313 321
@@ -318,8 +326,6 @@ last_resort:
318 *itag = 0; 326 *itag = 0;
319 return 0; 327 return 0;
320 328
321e_inval_res:
322 fib_res_put(&res);
323e_inval: 329e_inval:
324 return -EINVAL; 330 return -EINVAL;
325e_rpf: 331e_rpf:
@@ -472,9 +478,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
472} 478}
473 479
474/* 480/*
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 481 * Handle IP routing ioctl calls.
482 * These are used to manipulate the routing tables
476 */ 483 */
477
478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 484int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
479{ 485{
480 struct fib_config cfg; 486 struct fib_config cfg;
@@ -518,7 +524,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
518 return -EINVAL; 524 return -EINVAL;
519} 525}
520 526
521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 527const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
522 [RTA_DST] = { .type = NLA_U32 }, 528 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 }, 529 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 }, 530 [RTA_IIF] = { .type = NLA_U32 },
@@ -532,7 +538,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
532}; 538};
533 539
534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 540static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg) 541 struct nlmsghdr *nlh, struct fib_config *cfg)
536{ 542{
537 struct nlattr *attr; 543 struct nlattr *attr;
538 int err, remaining; 544 int err, remaining;
@@ -687,12 +693,11 @@ out:
687} 693}
688 694
689/* Prepare and feed intra-kernel routing request. 695/* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink 696 * Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly 697 * can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur 698 * to fib engine. It is legal, because all events occur
693 only when netlink is already locked. 699 * only when netlink is already locked.
694 */ 700 */
695
696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 701static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
697{ 702{
698 struct net *net = dev_net(ifa->ifa_dev->dev); 703 struct net *net = dev_net(ifa->ifa_dev->dev);
@@ -738,9 +743,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
738 struct in_ifaddr *prim = ifa; 743 struct in_ifaddr *prim = ifa;
739 __be32 mask = ifa->ifa_mask; 744 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local; 745 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask; 746 __be32 prefix = ifa->ifa_address & mask;
742 747
743 if (ifa->ifa_flags&IFA_F_SECONDARY) { 748 if (ifa->ifa_flags & IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask); 749 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) { 750 if (prim == NULL) {
746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 751 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
@@ -750,22 +755,24 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
750 755
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 756 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752 757
753 if (!(dev->flags&IFF_UP)) 758 if (!(dev->flags & IFF_UP))
754 return; 759 return;
755 760
756 /* Add broadcast address, if it is explicitly assigned. */ 761 /* Add broadcast address, if it is explicitly assigned. */
757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 762 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 763 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759 764
760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 765 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
761 (prefix != addr || ifa->ifa_prefixlen < 32)) { 766 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 767 fib_magic(RTM_NEWROUTE,
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 768 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
769 prefix, ifa->ifa_prefixlen, prim);
764 770
765 /* Add network specific broadcasts, when it takes a sense */ 771 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) { 772 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 773 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 774 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
775 32, prim);
769 } 776 }
770 } 777 }
771} 778}
@@ -776,17 +783,18 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
776 struct net_device *dev = in_dev->dev; 783 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1; 784 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa; 785 struct in_ifaddr *prim = ifa;
779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 786 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask; 787 __be32 any = ifa->ifa_address & ifa->ifa_mask;
781#define LOCAL_OK 1 788#define LOCAL_OK 1
782#define BRD_OK 2 789#define BRD_OK 2
783#define BRD0_OK 4 790#define BRD0_OK 4
784#define BRD1_OK 8 791#define BRD1_OK 8
785 unsigned ok = 0; 792 unsigned ok = 0;
786 793
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 794 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 795 fib_magic(RTM_DELROUTE,
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 796 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
797 any, ifa->ifa_prefixlen, prim);
790 else { 798 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 799 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) { 800 if (prim == NULL) {
@@ -796,9 +804,9 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
796 } 804 }
797 805
798 /* Deletion is more complicated than add. 806 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-) 807 * We should take care of not to delete too much :-)
800 808 *
801 Scan address list to be sure that addresses are really gone. 809 * Scan address list to be sure that addresses are really gone.
802 */ 810 */
803 811
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 812 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
@@ -812,23 +820,23 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
812 ok |= BRD0_OK; 820 ok |= BRD0_OK;
813 } 821 }
814 822
815 if (!(ok&BRD_OK)) 823 if (!(ok & BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 824 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK)) 825 if (!(ok & BRD1_OK))
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 826 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819 if (!(ok&BRD0_OK)) 827 if (!(ok & BRD0_OK))
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 828 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821 if (!(ok&LOCAL_OK)) { 829 if (!(ok & LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 830 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823 831
824 /* Check, that this local address finally disappeared. */ 832 /* Check, that this local address finally disappeared. */
825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 833 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
826 /* And the last, but not the least thing. 834 /* And the last, but not the least thing.
827 We must flush stray FIB entries. 835 * We must flush stray FIB entries.
828 836 *
829 First of all, we scan fib_info list searching 837 * First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush. 838 * for stray nexthop entries, then ignite fib_flush.
831 */ 839 */
832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 840 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev)); 841 fib_flush(dev_net(dev));
834 } 842 }
@@ -839,14 +847,20 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
839#undef BRD1_OK 847#undef BRD1_OK
840} 848}
841 849
842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 850static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
843{ 851{
844 852
845 struct fib_result res; 853 struct fib_result res;
846 struct flowi fl = { .mark = frn->fl_mark, 854 struct flowi fl = {
847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 855 .mark = frn->fl_mark,
848 .tos = frn->fl_tos, 856 .nl_u = {
849 .scope = frn->fl_scope } } }; 857 .ip4_u = {
858 .daddr = frn->fl_addr,
859 .tos = frn->fl_tos,
860 .scope = frn->fl_scope
861 }
862 }
863 };
850 864
851#ifdef CONFIG_IP_MULTIPLE_TABLES 865#ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL; 866 res.r = NULL;
@@ -857,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
857 local_bh_disable(); 871 local_bh_disable();
858 872
859 frn->tb_id = tb->tb_id; 873 frn->tb_id = tb->tb_id;
860 frn->err = fib_table_lookup(tb, &fl, &res); 874 rcu_read_lock();
875 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF);
861 876
862 if (!frn->err) { 877 if (!frn->err) {
863 frn->prefixlen = res.prefixlen; 878 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel; 879 frn->nh_sel = res.nh_sel;
865 frn->type = res.type; 880 frn->type = res.type;
866 frn->scope = res.scope; 881 frn->scope = res.scope;
867 fib_res_put(&res);
868 } 882 }
883 rcu_read_unlock();
869 local_bh_enable(); 884 local_bh_enable();
870 } 885 }
871} 886}
@@ -894,8 +909,8 @@ static void nl_fib_input(struct sk_buff *skb)
894 909
895 nl_fib_lookup(frn, tb); 910 nl_fib_lookup(frn, tb);
896 911
897 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 912 pid = NETLINK_CB(skb).pid; /* pid of sending process */
898 NETLINK_CB(skb).pid = 0; /* from kernel */ 913 NETLINK_CB(skb).pid = 0; /* from kernel */
899 NETLINK_CB(skb).dst_group = 0; /* unicast */ 914 NETLINK_CB(skb).dst_group = 0; /* unicast */
900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 915 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
901} 916}
@@ -942,7 +957,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
942 fib_del_ifaddr(ifa); 957 fib_del_ifaddr(ifa);
943 if (ifa->ifa_dev->ifa_list == NULL) { 958 if (ifa->ifa_dev->ifa_list == NULL) {
944 /* Last address was deleted from this interface. 959 /* Last address was deleted from this interface.
945 Disable IP. 960 * Disable IP.
946 */ 961 */
947 fib_disable_ip(dev, 1, 0); 962 fib_disable_ip(dev, 1, 0);
948 } else { 963 } else {
@@ -1001,16 +1016,15 @@ static struct notifier_block fib_netdev_notifier = {
1001static int __net_init ip_fib_net_init(struct net *net) 1016static int __net_init ip_fib_net_init(struct net *net)
1002{ 1017{
1003 int err; 1018 int err;
1004 unsigned int i; 1019 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1005 1020
1006 net->ipv4.fib_table_hash = kzalloc( 1021 /* Avoid false sharing : Use at least a full cache line */
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 1022 size = max_t(size_t, size, L1_CACHE_BYTES);
1023
1024 net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL) 1025 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM; 1026 return -ENOMEM;
1010 1027
1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1013
1014 err = fib4_rules_init(net); 1028 err = fib4_rules_init(net);
1015 if (err < 0) 1029 if (err < 0)
1016 goto fail; 1030 goto fail;
@@ -1038,7 +1052,7 @@ static void ip_fib_net_exit(struct net *net)
1038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1052 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039 hlist_del(node); 1053 hlist_del(node);
1040 fib_table_flush(tb); 1054 fib_table_flush(tb);
1041 kfree(tb); 1055 fib_free_table(tb);
1042 } 1056 }
1043 } 1057 }
1044 kfree(net->ipv4.fib_table_hash); 1058 kfree(net->ipv4.fib_table_hash);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 4ed7e0dea1bc..b3acb0417b21 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -54,36 +54,37 @@ struct fib_node {
54 struct fib_alias fn_embedded_alias; 54 struct fib_alias fn_embedded_alias;
55}; 55};
56 56
57struct fn_zone { 57#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
58 struct fn_zone *fz_next; /* Next not empty zone */
59 struct hlist_head *fz_hash; /* Hash table pointer */
60 int fz_nent; /* Number of entries */
61 58
62 int fz_divisor; /* Hash divisor */ 59struct fn_zone {
60 struct fn_zone __rcu *fz_next; /* Next not empty zone */
61 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
63 u32 fz_hashmask; /* (fz_divisor - 1) */ 63 u32 fz_hashmask; /* (fz_divisor - 1) */
64#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
65 64
66 int fz_order; /* Zone order */ 65 u8 fz_order; /* Zone order (0..32) */
67 __be32 fz_mask; 66 u8 fz_revorder; /* 32 - fz_order */
67 __be32 fz_mask; /* inet_make_mask(order) */
68#define FZ_MASK(fz) ((fz)->fz_mask) 68#define FZ_MASK(fz) ((fz)->fz_mask)
69};
70 69
71/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask 70 struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
72 * can be cheaper than memory lookup, so that FZ_* macros are used. 71
73 */ 72 int fz_nent; /* Number of entries */
73 int fz_divisor; /* Hash size (mask+1) */
74};
74 75
75struct fn_hash { 76struct fn_hash {
76 struct fn_zone *fn_zones[33]; 77 struct fn_zone *fn_zones[33];
77 struct fn_zone *fn_zone_list; 78 struct fn_zone __rcu *fn_zone_list;
78}; 79};
79 80
80static inline u32 fn_hash(__be32 key, struct fn_zone *fz) 81static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
81{ 82{
82 u32 h = ntohl(key)>>(32 - fz->fz_order); 83 u32 h = ntohl(key) >> fz->fz_revorder;
83 h ^= (h>>20); 84 h ^= (h>>20);
84 h ^= (h>>10); 85 h ^= (h>>10);
85 h ^= (h>>5); 86 h ^= (h>>5);
86 h &= FZ_HASHMASK(fz); 87 h &= fz->fz_hashmask;
87 return h; 88 return h;
88} 89}
89 90
@@ -92,7 +93,6 @@ static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
92 return dst & FZ_MASK(fz); 93 return dst & FZ_MASK(fz);
93} 94}
94 95
95static DEFINE_RWLOCK(fib_hash_lock);
96static unsigned int fib_hash_genid; 96static unsigned int fib_hash_genid;
97 97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) 98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
@@ -101,12 +101,11 @@ static struct hlist_head *fz_hash_alloc(int divisor)
101{ 101{
102 unsigned long size = divisor * sizeof(struct hlist_head); 102 unsigned long size = divisor * sizeof(struct hlist_head);
103 103
104 if (size <= PAGE_SIZE) { 104 if (size <= PAGE_SIZE)
105 return kzalloc(size, GFP_KERNEL); 105 return kzalloc(size, GFP_KERNEL);
106 } else { 106
107 return (struct hlist_head *) 107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); 108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109 }
110} 109}
111 110
112/* The fib hash lock must be held when this is called. */ 111/* The fib hash lock must be held when this is called. */
@@ -123,10 +122,11 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
123 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
124 struct hlist_head *new_head; 123 struct hlist_head *new_head;
125 124
126 hlist_del(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
127 126
128 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = rcu_dereference_protected(fz->fz_hash, 1) +
129 hlist_add_head(&f->fn_hash, new_head); 128 fn_hash(f->fn_key, fz);
129 hlist_add_head_rcu(&f->fn_hash, new_head);
130 } 130 }
131 } 131 }
132} 132}
@@ -147,14 +147,14 @@ static void fn_rehash_zone(struct fn_zone *fz)
147 int old_divisor, new_divisor; 147 int old_divisor, new_divisor;
148 u32 new_hashmask; 148 u32 new_hashmask;
149 149
150 old_divisor = fz->fz_divisor; 150 new_divisor = old_divisor = fz->fz_divisor;
151 151
152 switch (old_divisor) { 152 switch (old_divisor) {
153 case 16: 153 case EMBEDDED_HASH_SIZE:
154 new_divisor = 256; 154 new_divisor *= EMBEDDED_HASH_SIZE;
155 break; 155 break;
156 case 256: 156 case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
157 new_divisor = 1024; 157 new_divisor *= (EMBEDDED_HASH_SIZE/2);
158 break; 158 break;
159 default: 159 default:
160 if ((old_divisor << 1) > FZ_MAX_DIVISOR) { 160 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
@@ -175,31 +175,55 @@ static void fn_rehash_zone(struct fn_zone *fz)
175 ht = fz_hash_alloc(new_divisor); 175 ht = fz_hash_alloc(new_divisor);
176 176
177 if (ht) { 177 if (ht) {
178 write_lock_bh(&fib_hash_lock); 178 struct fn_zone nfz;
179 old_ht = fz->fz_hash; 179
180 fz->fz_hash = ht; 180 memcpy(&nfz, fz, sizeof(nfz));
181
182 write_seqlock_bh(&fz->fz_lock);
183 old_ht = rcu_dereference_protected(fz->fz_hash, 1);
184 RCU_INIT_POINTER(nfz.fz_hash, ht);
185 nfz.fz_hashmask = new_hashmask;
186 nfz.fz_divisor = new_divisor;
187 fn_rebuild_zone(&nfz, old_ht, old_divisor);
188 fib_hash_genid++;
189 rcu_assign_pointer(fz->fz_hash, ht);
181 fz->fz_hashmask = new_hashmask; 190 fz->fz_hashmask = new_hashmask;
182 fz->fz_divisor = new_divisor; 191 fz->fz_divisor = new_divisor;
183 fn_rebuild_zone(fz, old_ht, old_divisor); 192 write_sequnlock_bh(&fz->fz_lock);
184 fib_hash_genid++;
185 write_unlock_bh(&fib_hash_lock);
186 193
187 fz_hash_free(old_ht, old_divisor); 194 if (old_ht != fz->fz_embedded_hash) {
195 synchronize_rcu();
196 fz_hash_free(old_ht, old_divisor);
197 }
188 } 198 }
189} 199}
190 200
191static inline void fn_free_node(struct fib_node * f) 201static void fn_free_node_rcu(struct rcu_head *head)
192{ 202{
203 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
204
193 kmem_cache_free(fn_hash_kmem, f); 205 kmem_cache_free(fn_hash_kmem, f);
194} 206}
195 207
208static inline void fn_free_node(struct fib_node *f)
209{
210 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
211}
212
213static void fn_free_alias_rcu(struct rcu_head *head)
214{
215 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
216
217 kmem_cache_free(fn_alias_kmem, fa);
218}
219
196static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) 220static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
197{ 221{
198 fib_release_info(fa->fa_info); 222 fib_release_info(fa->fa_info);
199 if (fa == &f->fn_embedded_alias) 223 if (fa == &f->fn_embedded_alias)
200 fa->fa_info = NULL; 224 fa->fa_info = NULL;
201 else 225 else
202 kmem_cache_free(fn_alias_kmem, fa); 226 call_rcu(&fa->rcu, fn_free_alias_rcu);
203} 227}
204 228
205static struct fn_zone * 229static struct fn_zone *
@@ -210,68 +234,71 @@ fn_new_zone(struct fn_hash *table, int z)
210 if (!fz) 234 if (!fz)
211 return NULL; 235 return NULL;
212 236
213 if (z) { 237 seqlock_init(&fz->fz_lock);
214 fz->fz_divisor = 16; 238 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
215 } else { 239 fz->fz_hashmask = fz->fz_divisor - 1;
216 fz->fz_divisor = 1; 240 RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash);
217 }
218 fz->fz_hashmask = (fz->fz_divisor - 1);
219 fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
220 if (!fz->fz_hash) {
221 kfree(fz);
222 return NULL;
223 }
224 fz->fz_order = z; 241 fz->fz_order = z;
242 fz->fz_revorder = 32 - z;
225 fz->fz_mask = inet_make_mask(z); 243 fz->fz_mask = inet_make_mask(z);
226 244
227 /* Find the first not empty zone with more specific mask */ 245 /* Find the first not empty zone with more specific mask */
228 for (i=z+1; i<=32; i++) 246 for (i = z + 1; i <= 32; i++)
229 if (table->fn_zones[i]) 247 if (table->fn_zones[i])
230 break; 248 break;
231 write_lock_bh(&fib_hash_lock); 249 if (i > 32) {
232 if (i>32) {
233 /* No more specific masks, we are the first. */ 250 /* No more specific masks, we are the first. */
234 fz->fz_next = table->fn_zone_list; 251 rcu_assign_pointer(fz->fz_next,
235 table->fn_zone_list = fz; 252 rtnl_dereference(table->fn_zone_list));
253 rcu_assign_pointer(table->fn_zone_list, fz);
236 } else { 254 } else {
237 fz->fz_next = table->fn_zones[i]->fz_next; 255 rcu_assign_pointer(fz->fz_next,
238 table->fn_zones[i]->fz_next = fz; 256 rtnl_dereference(table->fn_zones[i]->fz_next));
257 rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
239 } 258 }
240 table->fn_zones[z] = fz; 259 table->fn_zones[z] = fz;
241 fib_hash_genid++; 260 fib_hash_genid++;
242 write_unlock_bh(&fib_hash_lock);
243 return fz; 261 return fz;
244} 262}
245 263
246int fib_table_lookup(struct fib_table *tb, 264int fib_table_lookup(struct fib_table *tb,
247 const struct flowi *flp, struct fib_result *res) 265 const struct flowi *flp, struct fib_result *res,
266 int fib_flags)
248{ 267{
249 int err; 268 int err;
250 struct fn_zone *fz; 269 struct fn_zone *fz;
251 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 270 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
252 271
253 read_lock(&fib_hash_lock); 272 rcu_read_lock();
254 for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { 273 for (fz = rcu_dereference(t->fn_zone_list);
274 fz != NULL;
275 fz = rcu_dereference(fz->fz_next)) {
255 struct hlist_head *head; 276 struct hlist_head *head;
256 struct hlist_node *node; 277 struct hlist_node *node;
257 struct fib_node *f; 278 struct fib_node *f;
258 __be32 k = fz_key(flp->fl4_dst, fz); 279 __be32 k;
280 unsigned int seq;
259 281
260 head = &fz->fz_hash[fn_hash(k, fz)]; 282 do {
261 hlist_for_each_entry(f, node, head, fn_hash) { 283 seq = read_seqbegin(&fz->fz_lock);
262 if (f->fn_key != k) 284 k = fz_key(flp->fl4_dst, fz);
263 continue; 285
286 head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
287 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
288 if (f->fn_key != k)
289 continue;
264 290
265 err = fib_semantic_match(&f->fn_alias, 291 err = fib_semantic_match(&f->fn_alias,
266 flp, res, 292 flp, res,
267 fz->fz_order); 293 fz->fz_order, fib_flags);
268 if (err <= 0) 294 if (err <= 0)
269 goto out; 295 goto out;
270 } 296 }
297 } while (read_seqretry(&fz->fz_lock, seq));
271 } 298 }
272 err = 1; 299 err = 1;
273out: 300out:
274 read_unlock(&fib_hash_lock); 301 rcu_read_unlock();
275 return err; 302 return err;
276} 303}
277 304
@@ -285,6 +312,7 @@ void fib_table_select_default(struct fib_table *tb,
285 struct fib_info *last_resort; 312 struct fib_info *last_resort;
286 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 313 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
287 struct fn_zone *fz = t->fn_zones[0]; 314 struct fn_zone *fz = t->fn_zones[0];
315 struct hlist_head *head;
288 316
289 if (fz == NULL) 317 if (fz == NULL)
290 return; 318 return;
@@ -293,11 +321,12 @@ void fib_table_select_default(struct fib_table *tb,
293 last_resort = NULL; 321 last_resort = NULL;
294 order = -1; 322 order = -1;
295 323
296 read_lock(&fib_hash_lock); 324 rcu_read_lock();
297 hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { 325 head = rcu_dereference(fz->fz_hash);
326 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
298 struct fib_alias *fa; 327 struct fib_alias *fa;
299 328
300 list_for_each_entry(fa, &f->fn_alias, fa_list) { 329 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
301 struct fib_info *next_fi = fa->fa_info; 330 struct fib_info *next_fi = fa->fa_info;
302 331
303 if (fa->fa_scope != res->scope || 332 if (fa->fa_scope != res->scope ||
@@ -309,7 +338,8 @@ void fib_table_select_default(struct fib_table *tb,
309 if (!next_fi->fib_nh[0].nh_gw || 338 if (!next_fi->fib_nh[0].nh_gw ||
310 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 339 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
311 continue; 340 continue;
312 fa->fa_state |= FA_S_ACCESSED; 341
342 fib_alias_accessed(fa);
313 343
314 if (fi == NULL) { 344 if (fi == NULL) {
315 if (next_fi != res->fi) 345 if (next_fi != res->fi)
@@ -341,25 +371,25 @@ void fib_table_select_default(struct fib_table *tb,
341 fib_result_assign(res, last_resort); 371 fib_result_assign(res, last_resort);
342 tb->tb_default = last_idx; 372 tb->tb_default = last_idx;
343out: 373out:
344 read_unlock(&fib_hash_lock); 374 rcu_read_unlock();
345} 375}
346 376
347/* Insert node F to FZ. */ 377/* Insert node F to FZ. */
348static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) 378static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
349{ 379{
350 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 380 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz);
351 381
352 hlist_add_head(&f->fn_hash, head); 382 hlist_add_head_rcu(&f->fn_hash, head);
353} 383}
354 384
355/* Return the node in FZ matching KEY. */ 385/* Return the node in FZ matching KEY. */
356static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) 386static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
357{ 387{
358 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; 388 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz);
359 struct hlist_node *node; 389 struct hlist_node *node;
360 struct fib_node *f; 390 struct fib_node *f;
361 391
362 hlist_for_each_entry(f, node, head, fn_hash) { 392 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
363 if (f->fn_key == key) 393 if (f->fn_key == key)
364 return f; 394 return f;
365 } 395 }
@@ -367,6 +397,17 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
367 return NULL; 397 return NULL;
368} 398}
369 399
400
401static struct fib_alias *fib_fast_alloc(struct fib_node *f)
402{
403 struct fib_alias *fa = &f->fn_embedded_alias;
404
405 if (fa->fa_info != NULL)
406 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
407 return fa;
408}
409
410/* Caller must hold RTNL. */
370int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 411int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
371{ 412{
372 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 413 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
@@ -451,7 +492,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
451 } 492 }
452 493
453 if (cfg->fc_nlflags & NLM_F_REPLACE) { 494 if (cfg->fc_nlflags & NLM_F_REPLACE) {
454 struct fib_info *fi_drop;
455 u8 state; 495 u8 state;
456 496
457 fa = fa_first; 497 fa = fa_first;
@@ -460,21 +500,25 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
460 err = 0; 500 err = 0;
461 goto out; 501 goto out;
462 } 502 }
463 write_lock_bh(&fib_hash_lock); 503 err = -ENOBUFS;
464 fi_drop = fa->fa_info; 504 new_fa = fib_fast_alloc(f);
465 fa->fa_info = fi; 505 if (new_fa == NULL)
466 fa->fa_type = cfg->fc_type; 506 goto out;
467 fa->fa_scope = cfg->fc_scope; 507
508 new_fa->fa_tos = fa->fa_tos;
509 new_fa->fa_info = fi;
510 new_fa->fa_type = cfg->fc_type;
511 new_fa->fa_scope = cfg->fc_scope;
468 state = fa->fa_state; 512 state = fa->fa_state;
469 fa->fa_state &= ~FA_S_ACCESSED; 513 new_fa->fa_state = state & ~FA_S_ACCESSED;
470 fib_hash_genid++; 514 fib_hash_genid++;
471 write_unlock_bh(&fib_hash_lock); 515 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
472 516
473 fib_release_info(fi_drop); 517 fn_free_alias(fa, f);
474 if (state & FA_S_ACCESSED) 518 if (state & FA_S_ACCESSED)
475 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 519 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
476 rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, 520 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
477 &cfg->fc_nlinfo, NLM_F_REPLACE); 521 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
478 return 0; 522 return 0;
479 } 523 }
480 524
@@ -506,12 +550,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
506 f = new_f; 550 f = new_f;
507 } 551 }
508 552
509 new_fa = &f->fn_embedded_alias; 553 new_fa = fib_fast_alloc(f);
510 if (new_fa->fa_info != NULL) { 554 if (new_fa == NULL)
511 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 555 goto out;
512 if (new_fa == NULL) 556
513 goto out;
514 }
515 new_fa->fa_info = fi; 557 new_fa->fa_info = fi;
516 new_fa->fa_tos = tos; 558 new_fa->fa_tos = tos;
517 new_fa->fa_type = cfg->fc_type; 559 new_fa->fa_type = cfg->fc_type;
@@ -522,13 +564,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
522 * Insert new entry to the list. 564 * Insert new entry to the list.
523 */ 565 */
524 566
525 write_lock_bh(&fib_hash_lock);
526 if (new_f) 567 if (new_f)
527 fib_insert_node(fz, new_f); 568 fib_insert_node(fz, new_f);
528 list_add_tail(&new_fa->fa_list, 569 list_add_tail_rcu(&new_fa->fa_list,
529 (fa ? &fa->fa_list : &f->fn_alias)); 570 (fa ? &fa->fa_list : &f->fn_alias));
530 fib_hash_genid++; 571 fib_hash_genid++;
531 write_unlock_bh(&fib_hash_lock);
532 572
533 if (new_f) 573 if (new_f)
534 fz->fz_nent++; 574 fz->fz_nent++;
@@ -603,14 +643,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
603 tb->tb_id, &cfg->fc_nlinfo, 0); 643 tb->tb_id, &cfg->fc_nlinfo, 0);
604 644
605 kill_fn = 0; 645 kill_fn = 0;
606 write_lock_bh(&fib_hash_lock); 646 list_del_rcu(&fa->fa_list);
607 list_del(&fa->fa_list);
608 if (list_empty(&f->fn_alias)) { 647 if (list_empty(&f->fn_alias)) {
609 hlist_del(&f->fn_hash); 648 hlist_del_rcu(&f->fn_hash);
610 kill_fn = 1; 649 kill_fn = 1;
611 } 650 }
612 fib_hash_genid++; 651 fib_hash_genid++;
613 write_unlock_bh(&fib_hash_lock);
614 652
615 if (fa->fa_state & FA_S_ACCESSED) 653 if (fa->fa_state & FA_S_ACCESSED)
616 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 654 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
@@ -627,7 +665,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
627 665
628static int fn_flush_list(struct fn_zone *fz, int idx) 666static int fn_flush_list(struct fn_zone *fz, int idx)
629{ 667{
630 struct hlist_head *head = &fz->fz_hash[idx]; 668 struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx;
631 struct hlist_node *node, *n; 669 struct hlist_node *node, *n;
632 struct fib_node *f; 670 struct fib_node *f;
633 int found = 0; 671 int found = 0;
@@ -641,14 +679,12 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
641 struct fib_info *fi = fa->fa_info; 679 struct fib_info *fi = fa->fa_info;
642 680
643 if (fi && (fi->fib_flags&RTNH_F_DEAD)) { 681 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
644 write_lock_bh(&fib_hash_lock); 682 list_del_rcu(&fa->fa_list);
645 list_del(&fa->fa_list);
646 if (list_empty(&f->fn_alias)) { 683 if (list_empty(&f->fn_alias)) {
647 hlist_del(&f->fn_hash); 684 hlist_del_rcu(&f->fn_hash);
648 kill_f = 1; 685 kill_f = 1;
649 } 686 }
650 fib_hash_genid++; 687 fib_hash_genid++;
651 write_unlock_bh(&fib_hash_lock);
652 688
653 fn_free_alias(fa, f); 689 fn_free_alias(fa, f);
654 found++; 690 found++;
@@ -662,13 +698,16 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
662 return found; 698 return found;
663} 699}
664 700
701/* caller must hold RTNL. */
665int fib_table_flush(struct fib_table *tb) 702int fib_table_flush(struct fib_table *tb)
666{ 703{
667 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 704 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
668 struct fn_zone *fz; 705 struct fn_zone *fz;
669 int found = 0; 706 int found = 0;
670 707
671 for (fz = table->fn_zone_list; fz; fz = fz->fz_next) { 708 for (fz = rtnl_dereference(table->fn_zone_list);
709 fz != NULL;
710 fz = rtnl_dereference(fz->fz_next)) {
672 int i; 711 int i;
673 712
674 for (i = fz->fz_divisor - 1; i >= 0; i--) 713 for (i = fz->fz_divisor - 1; i >= 0; i--)
@@ -677,6 +716,24 @@ int fib_table_flush(struct fib_table *tb)
677 return found; 716 return found;
678} 717}
679 718
719void fib_free_table(struct fib_table *tb)
720{
721 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
722 struct fn_zone *fz, *next;
723
724 next = table->fn_zone_list;
725 while (next != NULL) {
726 fz = next;
727 next = fz->fz_next;
728
729 if (fz->fz_hash != fz->fz_embedded_hash)
730 fz_hash_free(fz->fz_hash, fz->fz_divisor);
731
732 kfree(fz);
733 }
734
735 kfree(tb);
736}
680 737
681static inline int 738static inline int
682fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, 739fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
@@ -690,10 +747,10 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
690 747
691 s_i = cb->args[4]; 748 s_i = cb->args[4];
692 i = 0; 749 i = 0;
693 hlist_for_each_entry(f, node, head, fn_hash) { 750 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
694 struct fib_alias *fa; 751 struct fib_alias *fa;
695 752
696 list_for_each_entry(fa, &f->fn_alias, fa_list) { 753 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
697 if (i < s_i) 754 if (i < s_i)
698 goto next; 755 goto next;
699 756
@@ -711,7 +768,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
711 cb->args[4] = i; 768 cb->args[4] = i;
712 return -1; 769 return -1;
713 } 770 }
714 next: 771next:
715 i++; 772 i++;
716 } 773 }
717 } 774 }
@@ -725,14 +782,15 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
725 struct fn_zone *fz) 782 struct fn_zone *fz)
726{ 783{
727 int h, s_h; 784 int h, s_h;
785 struct hlist_head *head = rcu_dereference(fz->fz_hash);
728 786
729 if (fz->fz_hash == NULL) 787 if (head == NULL)
730 return skb->len; 788 return skb->len;
731 s_h = cb->args[3]; 789 s_h = cb->args[3];
732 for (h = s_h; h < fz->fz_divisor; h++) { 790 for (h = s_h; h < fz->fz_divisor; h++) {
733 if (hlist_empty(&fz->fz_hash[h])) 791 if (hlist_empty(head + h))
734 continue; 792 continue;
735 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) { 793 if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) {
736 cb->args[3] = h; 794 cb->args[3] = h;
737 return -1; 795 return -1;
738 } 796 }
@@ -746,23 +804,26 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
746int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 804int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
747 struct netlink_callback *cb) 805 struct netlink_callback *cb)
748{ 806{
749 int m, s_m; 807 int m = 0, s_m;
750 struct fn_zone *fz; 808 struct fn_zone *fz;
751 struct fn_hash *table = (struct fn_hash *)tb->tb_data; 809 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
752 810
753 s_m = cb->args[2]; 811 s_m = cb->args[2];
754 read_lock(&fib_hash_lock); 812 rcu_read_lock();
755 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { 813 for (fz = rcu_dereference(table->fn_zone_list);
756 if (m < s_m) continue; 814 fz != NULL;
815 fz = rcu_dereference(fz->fz_next), m++) {
816 if (m < s_m)
817 continue;
757 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 818 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
758 cb->args[2] = m; 819 cb->args[2] = m;
759 read_unlock(&fib_hash_lock); 820 rcu_read_unlock();
760 return -1; 821 return -1;
761 } 822 }
762 memset(&cb->args[3], 0, 823 memset(&cb->args[3], 0,
763 sizeof(cb->args) - 3*sizeof(cb->args[0])); 824 sizeof(cb->args) - 3*sizeof(cb->args[0]));
764 } 825 }
765 read_unlock(&fib_hash_lock); 826 rcu_read_unlock();
766 cb->args[2] = m; 827 cb->args[2] = m;
767 return skb->len; 828 return skb->len;
768} 829}
@@ -825,14 +886,15 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
825 iter->genid = fib_hash_genid; 886 iter->genid = fib_hash_genid;
826 iter->valid = 1; 887 iter->valid = 1;
827 888
828 for (iter->zone = table->fn_zone_list; iter->zone; 889 for (iter->zone = rcu_dereference(table->fn_zone_list);
829 iter->zone = iter->zone->fz_next) { 890 iter->zone != NULL;
891 iter->zone = rcu_dereference(iter->zone->fz_next)) {
830 int maxslot; 892 int maxslot;
831 893
832 if (!iter->zone->fz_nent) 894 if (!iter->zone->fz_nent)
833 continue; 895 continue;
834 896
835 iter->hash_head = iter->zone->fz_hash; 897 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
836 maxslot = iter->zone->fz_divisor; 898 maxslot = iter->zone->fz_divisor;
837 899
838 for (iter->bucket = 0; iter->bucket < maxslot; 900 for (iter->bucket = 0; iter->bucket < maxslot;
@@ -911,13 +973,13 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
911 } 973 }
912 } 974 }
913 975
914 iter->zone = iter->zone->fz_next; 976 iter->zone = rcu_dereference(iter->zone->fz_next);
915 977
916 if (!iter->zone) 978 if (!iter->zone)
917 goto out; 979 goto out;
918 980
919 iter->bucket = 0; 981 iter->bucket = 0;
920 iter->hash_head = iter->zone->fz_hash; 982 iter->hash_head = rcu_dereference(iter->zone->fz_hash);
921 983
922 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { 984 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
923 list_for_each_entry(fa, &fn->fn_alias, fa_list) { 985 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
@@ -950,11 +1012,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
950} 1012}
951 1013
952static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 1014static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
953 __acquires(fib_hash_lock) 1015 __acquires(RCU)
954{ 1016{
955 void *v = NULL; 1017 void *v = NULL;
956 1018
957 read_lock(&fib_hash_lock); 1019 rcu_read_lock();
958 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) 1020 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
959 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 1021 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
960 return v; 1022 return v;
@@ -967,15 +1029,16 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
967} 1029}
968 1030
969static void fib_seq_stop(struct seq_file *seq, void *v) 1031static void fib_seq_stop(struct seq_file *seq, void *v)
970 __releases(fib_hash_lock) 1032 __releases(RCU)
971{ 1033{
972 read_unlock(&fib_hash_lock); 1034 rcu_read_unlock();
973} 1035}
974 1036
975static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) 1037static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
976{ 1038{
977 static const unsigned type2flags[RTN_MAX + 1] = { 1039 static const unsigned type2flags[RTN_MAX + 1] = {
978 [7] = RTF_REJECT, [8] = RTF_REJECT, 1040 [7] = RTF_REJECT,
1041 [8] = RTF_REJECT,
979 }; 1042 };
980 unsigned flags = type2flags[type]; 1043 unsigned flags = type2flags[type];
981 1044
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 637b133973bd..c079cc0ec651 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,17 +12,22 @@ struct fib_alias {
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_scope; 13 u8 fa_scope;
14 u8 fa_state; 14 u8 fa_state;
15#ifdef CONFIG_IP_FIB_TRIE
16 struct rcu_head rcu; 15 struct rcu_head rcu;
17#endif
18}; 16};
19 17
20#define FA_S_ACCESSED 0x01 18#define FA_S_ACCESSED 0x01
21 19
20/* Dont write on fa_state unless needed, to keep it shared on all cpus */
21static inline void fib_alias_accessed(struct fib_alias *fa)
22{
23 if (!(fa->fa_state & FA_S_ACCESSED))
24 fa->fa_state |= FA_S_ACCESSED;
25}
26
22/* Exported by fib_semantics.c */ 27/* Exported by fib_semantics.c */
23extern int fib_semantic_match(struct list_head *head, 28extern int fib_semantic_match(struct list_head *head,
24 const struct flowi *flp, 29 const struct flowi *flp,
25 struct fib_result *res, int prefixlen); 30 struct fib_result *res, int prefixlen, int fib_flags);
26extern void fib_release_info(struct fib_info *); 31extern void fib_release_info(struct fib_info *);
27extern struct fib_info *fib_create_info(struct fib_config *cfg); 32extern struct fib_info *fib_create_info(struct fib_config *cfg);
28extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); 33extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
@@ -42,11 +47,8 @@ extern int fib_detect_death(struct fib_info *fi, int order,
42static inline void fib_result_assign(struct fib_result *res, 47static inline void fib_result_assign(struct fib_result *res,
43 struct fib_info *fi) 48 struct fib_info *fi)
44{ 49{
45 if (res->fi != NULL) 50 /* we used to play games with refcounts, but we now use RCU */
46 fib_info_put(res->fi);
47 res->fi = fi; 51 res->fi = fi;
48 if (fi != NULL)
49 atomic_inc(&fi->fib_clntref);
50} 52}
51 53
52#endif /* _FIB_LOOKUP_H */ 54#endif /* _FIB_LOOKUP_H */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 76daeb5ff564..7981a24f5c7b 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -6,7 +6,7 @@
6 * IPv4 Forwarding Information Base: policy rules. 6 * IPv4 Forwarding Information Base: policy rules.
7 * 7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 * Thomas Graf <tgraf@suug.ch> 9 * Thomas Graf <tgraf@suug.ch>
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -14,7 +14,7 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 * 15 *
16 * Fixes: 16 * Fixes:
17 * Rani Assaf : local_rule cannot be deleted 17 * Rani Assaf : local_rule cannot be deleted
18 * Marc Boucher : routing by fwmark 18 * Marc Boucher : routing by fwmark
19 */ 19 */
20 20
@@ -32,8 +32,7 @@
32#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h> 33#include <net/fib_rules.h>
34 34
35struct fib4_rule 35struct fib4_rule {
36{
37 struct fib_rule common; 36 struct fib_rule common;
38 u8 dst_len; 37 u8 dst_len;
39 u8 src_len; 38 u8 src_len;
@@ -58,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
58{ 57{
59 struct fib_lookup_arg arg = { 58 struct fib_lookup_arg arg = {
60 .result = res, 59 .result = res,
60 .flags = FIB_LOOKUP_NOREF,
61 }; 61 };
62 int err; 62 int err;
63 63
@@ -91,10 +91,11 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
91 goto errout; 91 goto errout;
92 } 92 }
93 93
94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) 94 tbl = fib_get_table(rule->fr_net, rule->table);
95 if (!tbl)
95 goto errout; 96 goto errout;
96 97
97 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); 98 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags);
98 if (err > 0) 99 if (err > 0)
99 err = -EAGAIN; 100 err = -EAGAIN;
100errout: 101errout:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 20f09c5b31e8..3e0da3ef6116 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -60,21 +60,30 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 60
61static DEFINE_SPINLOCK(fib_multipath_lock); 61static DEFINE_SPINLOCK(fib_multipath_lock);
62 62
63#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 63#define for_nexthops(fi) { \
64for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 int nhsel; const struct fib_nh *nh; \
65 65 for (nhsel = 0, nh = (fi)->fib_nh; \
66#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ 66 nhsel < (fi)->fib_nhs; \
67for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) 67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
68 74
69#else /* CONFIG_IP_ROUTE_MULTIPATH */ 75#else /* CONFIG_IP_ROUTE_MULTIPATH */
70 76
71/* Hope, that gcc will optimize it to get rid of dummy loop */ 77/* Hope, that gcc will optimize it to get rid of dummy loop */
72 78
73#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ 79#define for_nexthops(fi) { \
74for (nhsel=0; nhsel < 1; nhsel++) 80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
75 82
76#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 83#define change_nexthops(fi) { \
77for (nhsel=0; nhsel < 1; nhsel++) 84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
78 87
79#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 88#endif /* CONFIG_IP_ROUTE_MULTIPATH */
80 89
@@ -86,63 +95,70 @@ static const struct
86 int error; 95 int error;
87 u8 scope; 96 u8 scope;
88} fib_props[RTN_MAX + 1] = { 97} fib_props[RTN_MAX + 1] = {
89 { 98 [RTN_UNSPEC] = {
90 .error = 0, 99 .error = 0,
91 .scope = RT_SCOPE_NOWHERE, 100 .scope = RT_SCOPE_NOWHERE,
92 }, /* RTN_UNSPEC */ 101 },
93 { 102 [RTN_UNICAST] = {
94 .error = 0, 103 .error = 0,
95 .scope = RT_SCOPE_UNIVERSE, 104 .scope = RT_SCOPE_UNIVERSE,
96 }, /* RTN_UNICAST */ 105 },
97 { 106 [RTN_LOCAL] = {
98 .error = 0, 107 .error = 0,
99 .scope = RT_SCOPE_HOST, 108 .scope = RT_SCOPE_HOST,
100 }, /* RTN_LOCAL */ 109 },
101 { 110 [RTN_BROADCAST] = {
102 .error = 0, 111 .error = 0,
103 .scope = RT_SCOPE_LINK, 112 .scope = RT_SCOPE_LINK,
104 }, /* RTN_BROADCAST */ 113 },
105 { 114 [RTN_ANYCAST] = {
106 .error = 0, 115 .error = 0,
107 .scope = RT_SCOPE_LINK, 116 .scope = RT_SCOPE_LINK,
108 }, /* RTN_ANYCAST */ 117 },
109 { 118 [RTN_MULTICAST] = {
110 .error = 0, 119 .error = 0,
111 .scope = RT_SCOPE_UNIVERSE, 120 .scope = RT_SCOPE_UNIVERSE,
112 }, /* RTN_MULTICAST */ 121 },
113 { 122 [RTN_BLACKHOLE] = {
114 .error = -EINVAL, 123 .error = -EINVAL,
115 .scope = RT_SCOPE_UNIVERSE, 124 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_BLACKHOLE */ 125 },
117 { 126 [RTN_UNREACHABLE] = {
118 .error = -EHOSTUNREACH, 127 .error = -EHOSTUNREACH,
119 .scope = RT_SCOPE_UNIVERSE, 128 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_UNREACHABLE */ 129 },
121 { 130 [RTN_PROHIBIT] = {
122 .error = -EACCES, 131 .error = -EACCES,
123 .scope = RT_SCOPE_UNIVERSE, 132 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_PROHIBIT */ 133 },
125 { 134 [RTN_THROW] = {
126 .error = -EAGAIN, 135 .error = -EAGAIN,
127 .scope = RT_SCOPE_UNIVERSE, 136 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_THROW */ 137 },
129 { 138 [RTN_NAT] = {
130 .error = -EINVAL, 139 .error = -EINVAL,
131 .scope = RT_SCOPE_NOWHERE, 140 .scope = RT_SCOPE_NOWHERE,
132 }, /* RTN_NAT */ 141 },
133 { 142 [RTN_XRESOLVE] = {
134 .error = -EINVAL, 143 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE, 144 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_XRESOLVE */ 145 },
137}; 146};
138 147
139 148
140/* Release a nexthop info record */ 149/* Release a nexthop info record */
141 150
151static void free_fib_info_rcu(struct rcu_head *head)
152{
153 struct fib_info *fi = container_of(head, struct fib_info, rcu);
154
155 kfree(fi);
156}
157
142void free_fib_info(struct fib_info *fi) 158void free_fib_info(struct fib_info *fi)
143{ 159{
144 if (fi->fib_dead == 0) { 160 if (fi->fib_dead == 0) {
145 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 161 pr_warning("Freeing alive fib_info %p\n", fi);
146 return; 162 return;
147 } 163 }
148 change_nexthops(fi) { 164 change_nexthops(fi) {
@@ -152,7 +168,7 @@ void free_fib_info(struct fib_info *fi)
152 } endfor_nexthops(fi); 168 } endfor_nexthops(fi);
153 fib_info_cnt--; 169 fib_info_cnt--;
154 release_net(fi->fib_net); 170 release_net(fi->fib_net);
155 kfree(fi); 171 call_rcu(&fi->rcu, free_fib_info_rcu);
156} 172}
157 173
158void fib_release_info(struct fib_info *fi) 174void fib_release_info(struct fib_info *fi)
@@ -173,7 +189,7 @@ void fib_release_info(struct fib_info *fi)
173 spin_unlock_bh(&fib_info_lock); 189 spin_unlock_bh(&fib_info_lock);
174} 190}
175 191
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 192static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{ 193{
178 const struct fib_nh *onh = ofi->fib_nh; 194 const struct fib_nh *onh = ofi->fib_nh;
179 195
@@ -187,7 +203,7 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
187#ifdef CONFIG_NET_CLS_ROUTE 203#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid || 204 nh->nh_tclassid != onh->nh_tclassid ||
189#endif 205#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
191 return -1; 207 return -1;
192 onh++; 208 onh++;
193 } endfor_nexthops(fi); 209 } endfor_nexthops(fi);
@@ -238,7 +254,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
238 nfi->fib_priority == fi->fib_priority && 254 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics, 255 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 && 256 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 257 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 258 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243 return fi; 259 return fi;
244 } 260 }
@@ -247,9 +263,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
247} 263}
248 264
249/* Check, that the gateway is already configured. 265/* Check, that the gateway is already configured.
250 Used only by redirect accept routine. 266 * Used only by redirect accept routine.
251 */ 267 */
252
253int ip_fib_check_default(__be32 gw, struct net_device *dev) 268int ip_fib_check_default(__be32 gw, struct net_device *dev)
254{ 269{
255 struct hlist_head *head; 270 struct hlist_head *head;
@@ -264,7 +279,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
264 hlist_for_each_entry(nh, node, head, nh_hash) { 279 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev && 280 if (nh->nh_dev == dev &&
266 nh->nh_gw == gw && 281 nh->nh_gw == gw &&
267 !(nh->nh_flags&RTNH_F_DEAD)) { 282 !(nh->nh_flags & RTNH_F_DEAD)) {
268 spin_unlock(&fib_info_lock); 283 spin_unlock(&fib_info_lock);
269 return 0; 284 return 0;
270 } 285 }
@@ -362,10 +377,10 @@ int fib_detect_death(struct fib_info *fi, int order,
362 } 377 }
363 if (state == NUD_REACHABLE) 378 if (state == NUD_REACHABLE)
364 return 0; 379 return 0;
365 if ((state&NUD_VALID) && order != dflt) 380 if ((state & NUD_VALID) && order != dflt)
366 return 0; 381 return 0;
367 if ((state&NUD_VALID) || 382 if ((state & NUD_VALID) ||
368 (*last_idx<0 && order > dflt)) { 383 (*last_idx < 0 && order > dflt)) {
369 *last_resort = fi; 384 *last_resort = fi;
370 *last_idx = order; 385 *last_idx = order;
371 } 386 }
@@ -476,75 +491,76 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 491
477 492
478/* 493/*
479 Picture 494 * Picture
480 ------- 495 * -------
481 496 *
482 Semantics of nexthop is very messy by historical reasons. 497 * Semantics of nexthop is very messy by historical reasons.
483 We have to take into account, that: 498 * We have to take into account, that:
484 a) gateway can be actually local interface address, 499 * a) gateway can be actually local interface address,
485 so that gatewayed route is direct. 500 * so that gatewayed route is direct.
486 b) gateway must be on-link address, possibly 501 * b) gateway must be on-link address, possibly
487 described not by an ifaddr, but also by a direct route. 502 * described not by an ifaddr, but also by a direct route.
488 c) If both gateway and interface are specified, they should not 503 * c) If both gateway and interface are specified, they should not
489 contradict. 504 * contradict.
490 d) If we use tunnel routes, gateway could be not on-link. 505 * d) If we use tunnel routes, gateway could be not on-link.
491 506 *
492 Attempt to reconcile all of these (alas, self-contradictory) conditions 507 * Attempt to reconcile all of these (alas, self-contradictory) conditions
493 results in pretty ugly and hairy code with obscure logic. 508 * results in pretty ugly and hairy code with obscure logic.
494 509 *
495 I chose to generalized it instead, so that the size 510 * I chose to generalized it instead, so that the size
496 of code does not increase practically, but it becomes 511 * of code does not increase practically, but it becomes
497 much more general. 512 * much more general.
498 Every prefix is assigned a "scope" value: "host" is local address, 513 * Every prefix is assigned a "scope" value: "host" is local address,
499 "link" is direct route, 514 * "link" is direct route,
500 [ ... "site" ... "interior" ... ] 515 * [ ... "site" ... "interior" ... ]
501 and "universe" is true gateway route with global meaning. 516 * and "universe" is true gateway route with global meaning.
502 517 *
503 Every prefix refers to a set of "nexthop"s (gw, oif), 518 * Every prefix refers to a set of "nexthop"s (gw, oif),
504 where gw must have narrower scope. This recursion stops 519 * where gw must have narrower scope. This recursion stops
505 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 520 * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506 which means that gw is forced to be on link. 521 * which means that gw is forced to be on link.
507 522 *
508 Code is still hairy, but now it is apparently logically 523 * Code is still hairy, but now it is apparently logically
509 consistent and very flexible. F.e. as by-product it allows 524 * consistent and very flexible. F.e. as by-product it allows
510 to co-exists in peace independent exterior and interior 525 * to co-exists in peace independent exterior and interior
511 routing processes. 526 * routing processes.
512 527 *
513 Normally it looks as following. 528 * Normally it looks as following.
514 529 *
515 {universe prefix} -> (gw, oif) [scope link] 530 * {universe prefix} -> (gw, oif) [scope link]
516 | 531 * |
517 |-> {link prefix} -> (gw, oif) [scope local] 532 * |-> {link prefix} -> (gw, oif) [scope local]
518 | 533 * |
519 |-> {local prefix} (terminal node) 534 * |-> {local prefix} (terminal node)
520 */ 535 */
521
522static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 536static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
523 struct fib_nh *nh) 537 struct fib_nh *nh)
524{ 538{
525 int err; 539 int err;
526 struct net *net; 540 struct net *net;
541 struct net_device *dev;
527 542
528 net = cfg->fc_nlinfo.nl_net; 543 net = cfg->fc_nlinfo.nl_net;
529 if (nh->nh_gw) { 544 if (nh->nh_gw) {
530 struct fib_result res; 545 struct fib_result res;
531 546
532 if (nh->nh_flags&RTNH_F_ONLINK) { 547 if (nh->nh_flags & RTNH_F_ONLINK) {
533 struct net_device *dev;
534 548
535 if (cfg->fc_scope >= RT_SCOPE_LINK) 549 if (cfg->fc_scope >= RT_SCOPE_LINK)
536 return -EINVAL; 550 return -EINVAL;
537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 551 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
538 return -EINVAL; 552 return -EINVAL;
539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 553 dev = __dev_get_by_index(net, nh->nh_oif);
554 if (!dev)
540 return -ENODEV; 555 return -ENODEV;
541 if (!(dev->flags&IFF_UP)) 556 if (!(dev->flags & IFF_UP))
542 return -ENETDOWN; 557 return -ENETDOWN;
543 nh->nh_dev = dev; 558 nh->nh_dev = dev;
544 dev_hold(dev); 559 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK; 560 nh->nh_scope = RT_SCOPE_LINK;
546 return 0; 561 return 0;
547 } 562 }
563 rcu_read_lock();
548 { 564 {
549 struct flowi fl = { 565 struct flowi fl = {
550 .nl_u = { 566 .nl_u = {
@@ -559,50 +575,53 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
559 /* It is not necessary, but requires a bit of thinking */ 575 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK) 576 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK; 577 fl.fl4_scope = RT_SCOPE_LINK;
562 if ((err = fib_lookup(net, &fl, &res)) != 0) 578 err = fib_lookup(net, &fl, &res);
579 if (err) {
580 rcu_read_unlock();
563 return err; 581 return err;
582 }
564 } 583 }
565 err = -EINVAL; 584 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 585 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out; 586 goto out;
568 nh->nh_scope = res.scope; 587 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res); 588 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 589 nh->nh_dev = dev = FIB_RES_DEV(res);
590 if (!dev)
571 goto out; 591 goto out;
572 dev_hold(nh->nh_dev); 592 dev_hold(dev);
573 err = -ENETDOWN; 593 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577out:
578 fib_res_put(&res);
579 return err;
580 } else { 594 } else {
581 struct in_device *in_dev; 595 struct in_device *in_dev;
582 596
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 597 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
584 return -EINVAL; 598 return -EINVAL;
585 599
600 rcu_read_lock();
601 err = -ENODEV;
586 in_dev = inetdev_by_index(net, nh->nh_oif); 602 in_dev = inetdev_by_index(net, nh->nh_oif);
587 if (in_dev == NULL) 603 if (in_dev == NULL)
588 return -ENODEV; 604 goto out;
589 if (!(in_dev->dev->flags&IFF_UP)) { 605 err = -ENETDOWN;
590 in_dev_put(in_dev); 606 if (!(in_dev->dev->flags & IFF_UP))
591 return -ENETDOWN; 607 goto out;
592 }
593 nh->nh_dev = in_dev->dev; 608 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev); 609 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST; 610 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev); 611 err = 0;
597 } 612 }
598 return 0; 613out:
614 rcu_read_unlock();
615 return err;
599} 616}
600 617
601static inline unsigned int fib_laddr_hashfn(__be32 val) 618static inline unsigned int fib_laddr_hashfn(__be32 val)
602{ 619{
603 unsigned int mask = (fib_hash_size - 1); 620 unsigned int mask = (fib_hash_size - 1);
604 621
605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 622 return ((__force u32)val ^
623 ((__force u32)val >> 7) ^
624 ((__force u32)val >> 14)) & mask;
606} 625}
607 626
608static struct hlist_head *fib_hash_alloc(int bytes) 627static struct hlist_head *fib_hash_alloc(int bytes)
@@ -611,7 +630,8 @@ static struct hlist_head *fib_hash_alloc(int bytes)
611 return kzalloc(bytes, GFP_KERNEL); 630 return kzalloc(bytes, GFP_KERNEL);
612 else 631 else
613 return (struct hlist_head *) 632 return (struct hlist_head *)
614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 633 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
634 get_order(bytes));
615} 635}
616 636
617static void fib_hash_free(struct hlist_head *hash, int bytes) 637static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -806,7 +826,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
806 goto failure; 826 goto failure;
807 } else { 827 } else {
808 change_nexthops(fi) { 828 change_nexthops(fi) {
809 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) 829 err = fib_check_nh(cfg, fi, nexthop_nh);
830 if (err != 0)
810 goto failure; 831 goto failure;
811 } endfor_nexthops(fi) 832 } endfor_nexthops(fi)
812 } 833 }
@@ -819,7 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
819 } 840 }
820 841
821link_it: 842link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) { 843 ofi = fib_find_info(fi);
844 if (ofi) {
823 fi->fib_dead = 1; 845 fi->fib_dead = 1;
824 free_fib_info(fi); 846 free_fib_info(fi);
825 ofi->fib_treeref++; 847 ofi->fib_treeref++;
@@ -864,7 +886,7 @@ failure:
864 886
865/* Note! fib_semantic_match intentionally uses RCU list functions. */ 887/* Note! fib_semantic_match intentionally uses RCU list functions. */
866int fib_semantic_match(struct list_head *head, const struct flowi *flp, 888int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867 struct fib_result *res, int prefixlen) 889 struct fib_result *res, int prefixlen, int fib_flags)
868{ 890{
869 struct fib_alias *fa; 891 struct fib_alias *fa;
870 int nh_sel = 0; 892 int nh_sel = 0;
@@ -879,7 +901,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
879 if (fa->fa_scope < flp->fl4_scope) 901 if (fa->fa_scope < flp->fl4_scope)
880 continue; 902 continue;
881 903
882 fa->fa_state |= FA_S_ACCESSED; 904 fib_alias_accessed(fa);
883 905
884 err = fib_props[fa->fa_type].error; 906 err = fib_props[fa->fa_type].error;
885 if (err == 0) { 907 if (err == 0) {
@@ -895,7 +917,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
895 case RTN_ANYCAST: 917 case RTN_ANYCAST:
896 case RTN_MULTICAST: 918 case RTN_MULTICAST:
897 for_nexthops(fi) { 919 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD) 920 if (nh->nh_flags & RTNH_F_DEAD)
899 continue; 921 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif) 922 if (!flp->oif || flp->oif == nh->nh_oif)
901 break; 923 break;
@@ -906,16 +928,15 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
906 goto out_fill_res; 928 goto out_fill_res;
907 } 929 }
908#else 930#else
909 if (nhsel < 1) { 931 if (nhsel < 1)
910 goto out_fill_res; 932 goto out_fill_res;
911 }
912#endif 933#endif
913 endfor_nexthops(fi); 934 endfor_nexthops(fi);
914 continue; 935 continue;
915 936
916 default: 937 default:
917 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 938 pr_warning("fib_semantic_match bad type %#x\n",
918 fa->fa_type); 939 fa->fa_type);
919 return -EINVAL; 940 return -EINVAL;
920 } 941 }
921 } 942 }
@@ -929,7 +950,8 @@ out_fill_res:
929 res->type = fa->fa_type; 950 res->type = fa->fa_type;
930 res->scope = fa->fa_scope; 951 res->scope = fa->fa_scope;
931 res->fi = fa->fa_info; 952 res->fi = fa->fa_info;
932 atomic_inc(&res->fi->fib_clntref); 953 if (!(fib_flags & FIB_LOOKUP_NOREF))
954 atomic_inc(&res->fi->fib_clntref);
933 return 0; 955 return 0;
934} 956}
935 957
@@ -1028,10 +1050,10 @@ nla_put_failure:
1028} 1050}
1029 1051
1030/* 1052/*
1031 Update FIB if: 1053 * Update FIB if:
1032 - local address disappeared -> we must delete all the entries 1054 * - local address disappeared -> we must delete all the entries
1033 referring to it. 1055 * referring to it.
1034 - device went down -> we must shutdown all nexthops going via it. 1056 * - device went down -> we must shutdown all nexthops going via it.
1035 */ 1057 */
1036int fib_sync_down_addr(struct net *net, __be32 local) 1058int fib_sync_down_addr(struct net *net, __be32 local)
1037{ 1059{
@@ -1078,7 +1100,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1078 prev_fi = fi; 1100 prev_fi = fi;
1079 dead = 0; 1101 dead = 0;
1080 change_nexthops(fi) { 1102 change_nexthops(fi) {
1081 if (nexthop_nh->nh_flags&RTNH_F_DEAD) 1103 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1082 dead++; 1104 dead++;
1083 else if (nexthop_nh->nh_dev == dev && 1105 else if (nexthop_nh->nh_dev == dev &&
1084 nexthop_nh->nh_scope != scope) { 1106 nexthop_nh->nh_scope != scope) {
@@ -1110,10 +1132,9 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH 1132#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111 1133
1112/* 1134/*
1113 Dead device goes up. We wake up dead nexthops. 1135 * Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes. 1136 * It takes sense only on multipath routes.
1115 */ 1137 */
1116
1117int fib_sync_up(struct net_device *dev) 1138int fib_sync_up(struct net_device *dev)
1118{ 1139{
1119 struct fib_info *prev_fi; 1140 struct fib_info *prev_fi;
@@ -1123,7 +1144,7 @@ int fib_sync_up(struct net_device *dev)
1123 struct fib_nh *nh; 1144 struct fib_nh *nh;
1124 int ret; 1145 int ret;
1125 1146
1126 if (!(dev->flags&IFF_UP)) 1147 if (!(dev->flags & IFF_UP))
1127 return 0; 1148 return 0;
1128 1149
1129 prev_fi = NULL; 1150 prev_fi = NULL;
@@ -1142,12 +1163,12 @@ int fib_sync_up(struct net_device *dev)
1142 prev_fi = fi; 1163 prev_fi = fi;
1143 alive = 0; 1164 alive = 0;
1144 change_nexthops(fi) { 1165 change_nexthops(fi) {
1145 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1166 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1146 alive++; 1167 alive++;
1147 continue; 1168 continue;
1148 } 1169 }
1149 if (nexthop_nh->nh_dev == NULL || 1170 if (nexthop_nh->nh_dev == NULL ||
1150 !(nexthop_nh->nh_dev->flags&IFF_UP)) 1171 !(nexthop_nh->nh_dev->flags & IFF_UP))
1151 continue; 1172 continue;
1152 if (nexthop_nh->nh_dev != dev || 1173 if (nexthop_nh->nh_dev != dev ||
1153 !__in_dev_get_rtnl(dev)) 1174 !__in_dev_get_rtnl(dev))
@@ -1169,10 +1190,9 @@ int fib_sync_up(struct net_device *dev)
1169} 1190}
1170 1191
1171/* 1192/*
1172 The algorithm is suboptimal, but it provides really 1193 * The algorithm is suboptimal, but it provides really
1173 fair weighted route distribution. 1194 * fair weighted route distribution.
1174 */ 1195 */
1175
1176void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1196void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1177{ 1197{
1178 struct fib_info *fi = res->fi; 1198 struct fib_info *fi = res->fi;
@@ -1182,7 +1202,7 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182 if (fi->fib_power <= 0) { 1202 if (fi->fib_power <= 0) {
1183 int power = 0; 1203 int power = 0;
1184 change_nexthops(fi) { 1204 change_nexthops(fi) {
1185 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1205 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1186 power += nexthop_nh->nh_weight; 1206 power += nexthop_nh->nh_weight;
1187 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1207 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1188 } 1208 }
@@ -1198,15 +1218,16 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1198 1218
1199 1219
1200 /* w should be random number [0..fi->fib_power-1], 1220 /* w should be random number [0..fi->fib_power-1],
1201 it is pretty bad approximation. 1221 * it is pretty bad approximation.
1202 */ 1222 */
1203 1223
1204 w = jiffies % fi->fib_power; 1224 w = jiffies % fi->fib_power;
1205 1225
1206 change_nexthops(fi) { 1226 change_nexthops(fi) {
1207 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && 1227 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
1208 nexthop_nh->nh_power) { 1228 nexthop_nh->nh_power) {
1209 if ((w -= nexthop_nh->nh_power) <= 0) { 1229 w -= nexthop_nh->nh_power;
1230 if (w <= 0) {
1210 nexthop_nh->nh_power--; 1231 nexthop_nh->nh_power--;
1211 fi->fib_power--; 1232 fi->fib_power--;
1212 res->nh_sel = nhsel; 1233 res->nh_sel = nhsel;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862bc..200eb538fbb3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -16,7 +16,7 @@
16 * 16 *
17 * An experimental study of compression methods for dynamic tries 17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 19 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
20 * 20 *
21 * 21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
@@ -186,9 +186,7 @@ static inline struct tnode *node_parent_rcu(struct node *node)
186{ 186{
187 struct tnode *ret = node_parent(node); 187 struct tnode *ret = node_parent(node);
188 188
189 return rcu_dereference_check(ret, 189 return rcu_dereference_rtnl(ret);
190 rcu_read_lock_held() ||
191 lockdep_rtnl_is_held());
192} 190}
193 191
194/* Same as rcu_assign_pointer 192/* Same as rcu_assign_pointer
@@ -211,9 +209,7 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
211{ 209{
212 struct node *ret = tnode_get_child(tn, i); 210 struct node *ret = tnode_get_child(tn, i);
213 211
214 return rcu_dereference_check(ret, 212 return rcu_dereference_rtnl(ret);
215 rcu_read_lock_held() ||
216 lockdep_rtnl_is_held());
217} 213}
218 214
219static inline int tnode_child_length(const struct tnode *tn) 215static inline int tnode_child_length(const struct tnode *tn)
@@ -459,8 +455,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
459 tn->empty_children = 1<<bits; 455 tn->empty_children = 1<<bits;
460 } 456 }
461 457
462 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
463 (unsigned long) (sizeof(struct node) << bits)); 459 sizeof(struct node) << bits);
464 return tn; 460 return tn;
465} 461}
466 462
@@ -609,11 +605,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
609 605
610 /* Keep root node larger */ 606 /* Keep root node larger */
611 607
612 if (!node_parent((struct node*) tn)) { 608 if (!node_parent((struct node *)tn)) {
613 inflate_threshold_use = inflate_threshold_root; 609 inflate_threshold_use = inflate_threshold_root;
614 halve_threshold_use = halve_threshold_root; 610 halve_threshold_use = halve_threshold_root;
615 } 611 } else {
616 else {
617 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
618 halve_threshold_use = halve_threshold; 613 halve_threshold_use = halve_threshold;
619 } 614 }
@@ -639,7 +634,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
639 check_tnode(tn); 634 check_tnode(tn);
640 635
641 /* Return if at least one inflate is run */ 636 /* Return if at least one inflate is run */
642 if( max_work != MAX_WORK) 637 if (max_work != MAX_WORK)
643 return (struct node *) tn; 638 return (struct node *) tn;
644 639
645 /* 640 /*
@@ -966,9 +961,7 @@ fib_find_node(struct trie *t, u32 key)
966 struct node *n; 961 struct node *n;
967 962
968 pos = 0; 963 pos = 0;
969 n = rcu_dereference_check(t->trie, 964 n = rcu_dereference_rtnl(t->trie);
970 rcu_read_lock_held() ||
971 lockdep_rtnl_is_held());
972 965
973 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 966 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
974 tn = (struct tnode *) n; 967 tn = (struct tnode *) n;
@@ -1349,7 +1342,7 @@ err:
1349/* should be called with rcu_read_lock */ 1342/* should be called with rcu_read_lock */
1350static int check_leaf(struct trie *t, struct leaf *l, 1343static int check_leaf(struct trie *t, struct leaf *l,
1351 t_key key, const struct flowi *flp, 1344 t_key key, const struct flowi *flp,
1352 struct fib_result *res) 1345 struct fib_result *res, int fib_flags)
1353{ 1346{
1354 struct leaf_info *li; 1347 struct leaf_info *li;
1355 struct hlist_head *hhead = &l->list; 1348 struct hlist_head *hhead = &l->list;
@@ -1363,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
1363 if (l->key != (key & ntohl(mask))) 1356 if (l->key != (key & ntohl(mask)))
1364 continue; 1357 continue;
1365 1358
1366 err = fib_semantic_match(&li->falh, flp, res, plen); 1359 err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags);
1367 1360
1368#ifdef CONFIG_IP_FIB_TRIE_STATS 1361#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 if (err <= 0) 1362 if (err <= 0)
@@ -1379,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
1379} 1372}
1380 1373
1381int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1374int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1382 struct fib_result *res) 1375 struct fib_result *res, int fib_flags)
1383{ 1376{
1384 struct trie *t = (struct trie *) tb->tb_data; 1377 struct trie *t = (struct trie *) tb->tb_data;
1385 int ret; 1378 int ret;
@@ -1391,8 +1384,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1391 t_key cindex = 0; 1384 t_key cindex = 0;
1392 int current_prefix_length = KEYLENGTH; 1385 int current_prefix_length = KEYLENGTH;
1393 struct tnode *cn; 1386 struct tnode *cn;
1394 t_key node_prefix, key_prefix, pref_mismatch; 1387 t_key pref_mismatch;
1395 int mp;
1396 1388
1397 rcu_read_lock(); 1389 rcu_read_lock();
1398 1390
@@ -1406,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1406 1398
1407 /* Just a leaf? */ 1399 /* Just a leaf? */
1408 if (IS_LEAF(n)) { 1400 if (IS_LEAF(n)) {
1409 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1401 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
1410 goto found; 1402 goto found;
1411 } 1403 }
1412 1404
@@ -1431,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1431 } 1423 }
1432 1424
1433 if (IS_LEAF(n)) { 1425 if (IS_LEAF(n)) {
1434 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1426 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
1435 if (ret > 0) 1427 if (ret > 0)
1436 goto backtrace; 1428 goto backtrace;
1437 goto found; 1429 goto found;
@@ -1507,10 +1499,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1507 * matching prefix. 1499 * matching prefix.
1508 */ 1500 */
1509 1501
1510 node_prefix = mask_pfx(cn->key, cn->pos); 1502 pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
1511 key_prefix = mask_pfx(key, cn->pos);
1512 pref_mismatch = key_prefix^node_prefix;
1513 mp = 0;
1514 1503
1515 /* 1504 /*
1516 * In short: If skipped bits in this node do not match 1505 * In short: If skipped bits in this node do not match
@@ -1518,13 +1507,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1518 * state.directly. 1507 * state.directly.
1519 */ 1508 */
1520 if (pref_mismatch) { 1509 if (pref_mismatch) {
1521 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1510 int mp = KEYLENGTH - fls(pref_mismatch);
1522 mp++;
1523 pref_mismatch = pref_mismatch << 1;
1524 }
1525 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1526 1511
1527 if (key_prefix != 0) 1512 if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
1528 goto backtrace; 1513 goto backtrace;
1529 1514
1530 if (current_prefix_length >= cn->pos) 1515 if (current_prefix_length >= cn->pos)
@@ -1748,16 +1733,14 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1748 1733
1749 /* Node empty, walk back up to parent */ 1734 /* Node empty, walk back up to parent */
1750 c = (struct node *) p; 1735 c = (struct node *) p;
1751 } while ( (p = node_parent_rcu(c)) != NULL); 1736 } while ((p = node_parent_rcu(c)) != NULL);
1752 1737
1753 return NULL; /* Root of trie */ 1738 return NULL; /* Root of trie */
1754} 1739}
1755 1740
1756static struct leaf *trie_firstleaf(struct trie *t) 1741static struct leaf *trie_firstleaf(struct trie *t)
1757{ 1742{
1758 struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, 1743 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
1759 rcu_read_lock_held() ||
1760 lockdep_rtnl_is_held());
1761 1744
1762 if (!n) 1745 if (!n)
1763 return NULL; 1746 return NULL;
@@ -1814,6 +1797,11 @@ int fib_table_flush(struct fib_table *tb)
1814 return found; 1797 return found;
1815} 1798}
1816 1799
1800void fib_free_table(struct fib_table *tb)
1801{
1802 kfree(tb);
1803}
1804
1817void fib_table_select_default(struct fib_table *tb, 1805void fib_table_select_default(struct fib_table *tb,
1818 const struct flowi *flp, 1806 const struct flowi *flp,
1819 struct fib_result *res) 1807 struct fib_result *res)
@@ -1855,7 +1843,8 @@ void fib_table_select_default(struct fib_table *tb,
1855 if (!next_fi->fib_nh[0].nh_gw || 1843 if (!next_fi->fib_nh[0].nh_gw ||
1856 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1844 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1857 continue; 1845 continue;
1858 fa->fa_state |= FA_S_ACCESSED; 1846
1847 fib_alias_accessed(fa);
1859 1848
1860 if (fi == NULL) { 1849 if (fi == NULL) {
1861 if (next_fi != res->fi) 1850 if (next_fi != res->fi)
@@ -2043,14 +2032,14 @@ struct fib_trie_iter {
2043 struct seq_net_private p; 2032 struct seq_net_private p;
2044 struct fib_table *tb; 2033 struct fib_table *tb;
2045 struct tnode *tnode; 2034 struct tnode *tnode;
2046 unsigned index; 2035 unsigned int index;
2047 unsigned depth; 2036 unsigned int depth;
2048}; 2037};
2049 2038
2050static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2039static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2051{ 2040{
2052 struct tnode *tn = iter->tnode; 2041 struct tnode *tn = iter->tnode;
2053 unsigned cindex = iter->index; 2042 unsigned int cindex = iter->index;
2054 struct tnode *p; 2043 struct tnode *p;
2055 2044
2056 /* A single entry routing table */ 2045 /* A single entry routing table */
@@ -2159,7 +2148,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2159 */ 2148 */
2160static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 2149static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2161{ 2150{
2162 unsigned i, max, pointers, bytes, avdepth; 2151 unsigned int i, max, pointers, bytes, avdepth;
2163 2152
2164 if (stat->leaves) 2153 if (stat->leaves)
2165 avdepth = stat->totdepth*100 / stat->leaves; 2154 avdepth = stat->totdepth*100 / stat->leaves;
@@ -2356,7 +2345,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2356 2345
2357static void seq_indent(struct seq_file *seq, int n) 2346static void seq_indent(struct seq_file *seq, int n)
2358{ 2347{
2359 while (n-- > 0) seq_puts(seq, " "); 2348 while (n-- > 0)
2349 seq_puts(seq, " ");
2360} 2350}
2361 2351
2362static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2352static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2378,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
2388 [RTN_XRESOLVE] = "XRESOLVE", 2378 [RTN_XRESOLVE] = "XRESOLVE",
2389}; 2379};
2390 2380
2391static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2381static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2392{ 2382{
2393 if (t < __RTN_MAX && rtn_type_names[t]) 2383 if (t < __RTN_MAX && rtn_type_names[t])
2394 return rtn_type_names[t]; 2384 return rtn_type_names[t];
@@ -2544,13 +2534,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
2544 rcu_read_unlock(); 2534 rcu_read_unlock();
2545} 2535}
2546 2536
2547static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2537static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2548{ 2538{
2549 static unsigned type2flags[RTN_MAX + 1] = { 2539 unsigned int flags = 0;
2550 [7] = RTF_REJECT, [8] = RTF_REJECT,
2551 };
2552 unsigned flags = type2flags[type];
2553 2540
2541 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2542 flags = RTF_REJECT;
2554 if (fi && fi->fib_nh->nh_gw) 2543 if (fi && fi->fib_nh->nh_gw)
2555 flags |= RTF_GATEWAY; 2544 flags |= RTF_GATEWAY;
2556 if (mask == htonl(0xFFFFFFFF)) 2545 if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2551,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2562/* 2551/*
2563 * This outputs /proc/net/route. 2552 * This outputs /proc/net/route.
2564 * The format of the file is not supposed to be changed 2553 * The format of the file is not supposed to be changed
2565 * and needs to be same as fib_hash output to avoid breaking 2554 * and needs to be same as fib_hash output to avoid breaking
2566 * legacy utilities 2555 * legacy utilities
2567 */ 2556 */
2568static int fib_route_seq_show(struct seq_file *seq, void *v) 2557static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2576,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2587 2576
2588 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2577 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2589 const struct fib_info *fi = fa->fa_info; 2578 const struct fib_info *fi = fa->fa_info;
2590 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2579 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2591 int len; 2580 int len;
2592 2581
2593 if (fa->fa_type == RTN_BROADCAST 2582 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..c6933f2ea310
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,152 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/in.h>
18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h>
21#include <net/protocol.h>
22#include <net/gre.h>
23
24
25static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock);
27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
29{
30 if (version >= GREPROTO_MAX)
31 goto err_out;
32
33 spin_lock(&gre_proto_lock);
34 if (gre_proto[version])
35 goto err_out_unlock;
36
37 rcu_assign_pointer(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock);
39 return 0;
40
41err_out_unlock:
42 spin_unlock(&gre_proto_lock);
43err_out:
44 return -1;
45}
46EXPORT_SYMBOL_GPL(gre_add_protocol);
47
48int gre_del_protocol(const struct gre_protocol *proto, u8 version)
49{
50 if (version >= GREPROTO_MAX)
51 goto err_out;
52
53 spin_lock(&gre_proto_lock);
54 if (rcu_dereference_protected(gre_proto[version],
55 lockdep_is_held(&gre_proto_lock)) != proto)
56 goto err_out_unlock;
57 rcu_assign_pointer(gre_proto[version], NULL);
58 spin_unlock(&gre_proto_lock);
59 synchronize_rcu();
60 return 0;
61
62err_out_unlock:
63 spin_unlock(&gre_proto_lock);
64err_out:
65 return -1;
66}
67EXPORT_SYMBOL_GPL(gre_del_protocol);
68
69static int gre_rcv(struct sk_buff *skb)
70{
71 const struct gre_protocol *proto;
72 u8 ver;
73 int ret;
74
75 if (!pskb_may_pull(skb, 12))
76 goto drop;
77
78 ver = skb->data[1]&0x7f;
79 if (ver >= GREPROTO_MAX)
80 goto drop;
81
82 rcu_read_lock();
83 proto = rcu_dereference(gre_proto[ver]);
84 if (!proto || !proto->handler)
85 goto drop_unlock;
86 ret = proto->handler(skb);
87 rcu_read_unlock();
88 return ret;
89
90drop_unlock:
91 rcu_read_unlock();
92drop:
93 kfree_skb(skb);
94 return NET_RX_DROP;
95}
96
97static void gre_err(struct sk_buff *skb, u32 info)
98{
99 const struct gre_protocol *proto;
100 u8 ver;
101
102 if (!pskb_may_pull(skb, 12))
103 goto drop;
104
105 ver = skb->data[1]&0x7f;
106 if (ver >= GREPROTO_MAX)
107 goto drop;
108
109 rcu_read_lock();
110 proto = rcu_dereference(gre_proto[ver]);
111 if (!proto || !proto->err_handler)
112 goto drop_unlock;
113 proto->err_handler(skb, info);
114 rcu_read_unlock();
115 return;
116
117drop_unlock:
118 rcu_read_unlock();
119drop:
120 kfree_skb(skb);
121}
122
123static const struct net_protocol net_gre_protocol = {
124 .handler = gre_rcv,
125 .err_handler = gre_err,
126 .netns_ok = 1,
127};
128
129static int __init gre_init(void)
130{
131 pr_info("GRE over IPv4 demultiplexor driver");
132
133 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
134 pr_err("gre: can't add protocol\n");
135 return -EAGAIN;
136 }
137
138 return 0;
139}
140
141static void __exit gre_exit(void)
142{
143 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
144}
145
146module_init(gre_init);
147module_exit(gre_exit);
148
149MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
150MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
151MODULE_LICENSE("GPL");
152
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
379 inet->tos = ip_hdr(skb)->tos; 379 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 380 daddr = ipc.addr = rt->rt_src;
381 ipc.opt = NULL; 381 ipc.opt = NULL;
382 ipc.shtx.flags = 0; 382 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 383 if (icmp_param->replyopts.optlen) {
384 ipc.opt = &icmp_param->replyopts; 384 ipc.opt = &icmp_param->replyopts;
385 if (ipc.opt->srr) 385 if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
538 inet_sk(sk)->tos = tos; 538 inet_sk(sk)->tos = tos;
539 ipc.addr = iph->saddr; 539 ipc.addr = iph->saddr;
540 ipc.opt = &icmp_param.replyopts; 540 ipc.opt = &icmp_param.replyopts;
541 ipc.shtx.flags = 0; 541 ipc.tx_flags = 0;
542 542
543 { 543 {
544 struct flowi fl = { 544 struct flowi fl = {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2a4bb76f2132..3c53c2d89e3b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1269,14 +1269,14 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
1269 if (im->multiaddr == IGMP_ALL_HOSTS) 1269 if (im->multiaddr == IGMP_ALL_HOSTS)
1270 return; 1270 return;
1271 1271
1272 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { 1272 /* a failover is happening and switches
1273 igmp_mod_timer(im, IGMP_Initial_Report_Delay); 1273 * must be notified immediately */
1274 return; 1274 if (IGMP_V1_SEEN(in_dev))
1275 } 1275 igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
1276 /* else, v3 */ 1276 else if (IGMP_V2_SEEN(in_dev))
1277 im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1277 igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
1278 IGMP_Unsolicited_Report_Count; 1278 else
1279 igmp_ifc_event(in_dev); 1279 igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
1280#endif 1280#endif
1281} 1281}
1282EXPORT_SYMBOL(ip_mc_rejoin_group); 1282EXPORT_SYMBOL(ip_mc_rejoin_group);
@@ -1418,6 +1418,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1418 write_unlock_bh(&in_dev->mc_list_lock); 1418 write_unlock_bh(&in_dev->mc_list_lock);
1419} 1419}
1420 1420
1421/* RTNL is locked */
1421static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1422static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1422{ 1423{
1423 struct flowi fl = { .nl_u = { .ip4_u = 1424 struct flowi fl = { .nl_u = { .ip4_u =
@@ -1428,15 +1429,12 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1428 1429
1429 if (imr->imr_ifindex) { 1430 if (imr->imr_ifindex) {
1430 idev = inetdev_by_index(net, imr->imr_ifindex); 1431 idev = inetdev_by_index(net, imr->imr_ifindex);
1431 if (idev)
1432 __in_dev_put(idev);
1433 return idev; 1432 return idev;
1434 } 1433 }
1435 if (imr->imr_address.s_addr) { 1434 if (imr->imr_address.s_addr) {
1436 dev = ip_dev_find(net, imr->imr_address.s_addr); 1435 dev = __ip_dev_find(net, imr->imr_address.s_addr, false);
1437 if (!dev) 1436 if (!dev)
1438 return NULL; 1437 return NULL;
1439 dev_put(dev);
1440 } 1438 }
1441 1439
1442 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1440 if (!dev && !ip_route_output_key(net, &rt, &fl)) {
@@ -2308,10 +2306,8 @@ void ip_mc_drop_socket(struct sock *sk)
2308 2306
2309 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2310 (void) ip_mc_leave_src(sk, iml, in_dev); 2308 (void) ip_mc_leave_src(sk, iml, in_dev);
2311 if (in_dev != NULL) { 2309 if (in_dev != NULL)
2312 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); 2310 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
2313 in_dev_put(in_dev);
2314 }
2315 /* decrease mem now to avoid the memleak warning */ 2311 /* decrease mem now to avoid the memleak warning */
2316 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); 2312 atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
2317 call_rcu(&iml->rcu, ip_mc_socklist_reclaim); 2313 call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce320..2ada17129fce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
425 bc += op->no; 425 bc += op->no;
426 } 426 }
427 } 427 }
428 return (len == 0); 428 return len == 0;
429} 429}
430 430
431static int valid_cc(const void *bc, int len, int cc) 431static int valid_cc(const void *bc, int len, int cc)
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
490{ 490{
491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 491 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
492 492
493 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 493 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
494 struct inet_diag_entry entry; 494 struct inet_diag_entry entry;
495 struct rtattr *bc = (struct rtattr *)(r + 1); 495 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
496 sizeof(*r),
497 INET_DIAG_REQ_BYTECODE);
496 struct inet_sock *inet = inet_sk(sk); 498 struct inet_sock *inet = inet_sk(sk);
497 499
498 entry.family = sk->sk_family; 500 entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
512 entry.dport = ntohs(inet->inet_dport); 514 entry.dport = ntohs(inet->inet_dport);
513 entry.userlocks = sk->sk_userlocks; 515 entry.userlocks = sk->sk_userlocks;
514 516
515 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 517 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
516 return 0; 518 return 0;
517 } 519 }
518 520
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
527{ 529{
528 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 530 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
529 531
530 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 532 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
531 struct inet_diag_entry entry; 533 struct inet_diag_entry entry;
532 struct rtattr *bc = (struct rtattr *)(r + 1); 534 const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
535 sizeof(*r),
536 INET_DIAG_REQ_BYTECODE);
533 537
534 entry.family = tw->tw_family; 538 entry.family = tw->tw_family;
535#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) 539#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
548 entry.dport = ntohs(tw->tw_dport); 552 entry.dport = ntohs(tw->tw_dport);
549 entry.userlocks = 0; 553 entry.userlocks = 0;
550 554
551 if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) 555 if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
552 return 0; 556 return 0;
553 } 557 }
554 558
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
618 struct inet_diag_req *r = NLMSG_DATA(cb->nlh); 622 struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
619 struct inet_connection_sock *icsk = inet_csk(sk); 623 struct inet_connection_sock *icsk = inet_csk(sk);
620 struct listen_sock *lopt; 624 struct listen_sock *lopt;
621 struct rtattr *bc = NULL; 625 const struct nlattr *bc = NULL;
622 struct inet_sock *inet = inet_sk(sk); 626 struct inet_sock *inet = inet_sk(sk);
623 int j, s_j; 627 int j, s_j;
624 int reqnum, s_reqnum; 628 int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
638 if (!lopt || !lopt->qlen) 642 if (!lopt || !lopt->qlen)
639 goto out; 643 goto out;
640 644
641 if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { 645 if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
642 bc = (struct rtattr *)(r + 1); 646 bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
647 INET_DIAG_REQ_BYTECODE);
643 entry.sport = inet->inet_num; 648 entry.sport = inet->inet_num;
644 entry.userlocks = sk->sk_userlocks; 649 entry.userlocks = sk->sk_userlocks;
645 } 650 }
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
672 &ireq->rmt_addr; 677 &ireq->rmt_addr;
673 entry.dport = ntohs(ireq->rmt_port); 678 entry.dport = ntohs(ireq->rmt_port);
674 679
675 if (!inet_diag_bc_run(RTA_DATA(bc), 680 if (!inet_diag_bc_run(nla_data(bc),
676 RTA_PAYLOAD(bc), &entry)) 681 nla_len(bc), &entry))
677 continue; 682 continue;
678 } 683 }
679 684
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff3..1b344f30b463 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
101} 101}
102EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
103 103
104void __inet_inherit_port(struct sock *sk, struct sock *child) 104int __inet_inherit_port(struct sock *sk, struct sock *child)
105{ 105{
106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
107 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, 107 unsigned short port = inet_sk(child)->inet_num;
108 const int bhash = inet_bhashfn(sock_net(sk), port,
108 table->bhash_size); 109 table->bhash_size);
109 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
110 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
111 112
112 spin_lock(&head->lock); 113 spin_lock(&head->lock);
113 tb = inet_csk(sk)->icsk_bind_hash; 114 tb = inet_csk(sk)->icsk_bind_hash;
115 if (tb->port != port) {
116 /* NOTE: using tproxy and redirecting skbs to a proxy
117 * on a different listener port breaks the assumption
118 * that the listener socket's icsk_bind_hash is the same
119 * as that of the child socket. We have to look up or
120 * create a new bind bucket for the child here. */
121 struct hlist_node *node;
122 inet_bind_bucket_for_each(tb, node, &head->chain) {
123 if (net_eq(ib_net(tb), sock_net(sk)) &&
124 tb->port == port)
125 break;
126 }
127 if (!node) {
128 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
129 sock_net(sk), head, port);
130 if (!tb) {
131 spin_unlock(&head->lock);
132 return -ENOMEM;
133 }
134 }
135 }
114 sk_add_bind_node(child, &tb->owners); 136 sk_add_bind_node(child, &tb->owners);
115 inet_csk(child)->icsk_bind_hash = tb; 137 inet_csk(child)->icsk_bind_hash = tb;
116 spin_unlock(&head->lock); 138 spin_unlock(&head->lock);
139
140 return 0;
117} 141}
118EXPORT_SYMBOL_GPL(__inet_inherit_port); 142EXPORT_SYMBOL_GPL(__inet_inherit_port);
119 143
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73 73
74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75static const struct inet_peer peer_fake_node = { 76static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty, 77 .avl_left = peer_avl_empty_rcu,
77 .avl_right = peer_avl_empty, 78 .avl_right = peer_avl_empty_rcu,
78 .avl_height = 0 79 .avl_height = 0
79}; 80};
80 81
81static struct { 82static struct {
82 struct inet_peer *root; 83 struct inet_peer __rcu *root;
83 spinlock_t lock; 84 spinlock_t lock;
84 int total; 85 int total;
85} peers = { 86} peers = {
86 .root = peer_avl_empty, 87 .root = peer_avl_empty_rcu,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0, 89 .total = 0,
89}; 90};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
156 */ 157 */
157#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack) \
158({ \ 159({ \
159 struct inet_peer *u, **v; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \
160 \ 162 \
161 stackptr = _stack; \ 163 stackptr = _stack; \
162 *stackptr++ = &peers.root; \ 164 *stackptr++ = &peers.root; \
163 for (u = peers.root; u != peer_avl_empty; ) { \ 165 for (u = rcu_dereference_protected(peers.root, \
166 lockdep_is_held(&peers.lock)); \
167 u != peer_avl_empty; ) { \
164 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
165 break; \ 169 break; \
166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
168 else \ 172 else \
169 v = &u->avl_right; \ 173 v = &u->avl_right; \
170 *stackptr++ = v; \ 174 *stackptr++ = v; \
171 u = *v; \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \
172 } \ 177 } \
173 u; \ 178 u; \
174}) 179})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
209/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
210#define lookup_rightempty(start) \ 215#define lookup_rightempty(start) \
211({ \ 216({ \
212 struct inet_peer *u, **v; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \
213 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
214 v = &start->avl_left; \ 220 v = &start->avl_left; \
215 for (u = *v; u->avl_right != peer_avl_empty; ) { \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \
216 v = &u->avl_right; \ 224 v = &u->avl_right; \
217 *stackptr++ = v; \ 225 *stackptr++ = v; \
218 u = *v; \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \
219 } \ 228 } \
220 u; \ 229 u; \
221}) 230})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
224 * Variable names are the proof of operation correctness. 233 * Variable names are the proof of operation correctness.
225 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 */ 235 */
227static void peer_avl_rebalance(struct inet_peer **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
228 struct inet_peer ***stackend) 237 struct inet_peer __rcu ***stackend)
229{ 238{
230 struct inet_peer **nodep, *node, *l, *r; 239 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r;
231 int lh, rh; 241 int lh, rh;
232 242
233 while (stackend > stack) { 243 while (stackend > stack) {
234 nodep = *--stackend; 244 nodep = *--stackend;
235 node = *nodep; 245 node = rcu_dereference_protected(*nodep,
236 l = node->avl_left; 246 lockdep_is_held(&peers.lock));
237 r = node->avl_right; 247 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock));
249 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock));
238 lh = node_height(l); 251 lh = node_height(l);
239 rh = node_height(r); 252 rh = node_height(r);
240 if (lh > rh + 1) { /* l: RH+2 */ 253 if (lh > rh + 1) { /* l: RH+2 */
241 struct inet_peer *ll, *lr, *lrl, *lrr; 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 int lrh; 255 int lrh;
243 ll = l->avl_left; 256 ll = rcu_dereference_protected(l->avl_left,
244 lr = l->avl_right; 257 lockdep_is_held(&peers.lock));
258 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock));
245 lrh = node_height(lr); 260 lrh = node_height(lr);
246 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247 node->avl_left = lr; /* lr: RH or RH+1 */ 262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
248 node->avl_right = r; /* r: RH */ 263 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 node->avl_height = lrh + 1; /* RH+1 or RH+2 */ 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250 l->avl_left = ll; /* ll: RH+1 */ 265 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
251 l->avl_right = node; /* node: RH+1 or RH+2 */ 266 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 l->avl_height = node->avl_height + 1; 267 l->avl_height = node->avl_height + 1;
253 *nodep = l; 268 RCU_INIT_POINTER(*nodep, l);
254 } else { /* ll: RH, lr: RH+1 */ 269 } else { /* ll: RH, lr: RH+1 */
255 lrl = lr->avl_left; /* lrl: RH or RH-1 */ 270 lrl = rcu_dereference_protected(lr->avl_left,
256 lrr = lr->avl_right; /* lrr: RH or RH-1 */ 271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
257 node->avl_left = lrr; /* lrr: RH or RH-1 */ 272 lrr = rcu_dereference_protected(lr->avl_right,
258 node->avl_right = r; /* r: RH */ 273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 node->avl_height = rh + 1; /* node: RH+1 */ 276 node->avl_height = rh + 1; /* node: RH+1 */
260 l->avl_left = ll; /* ll: RH */ 277 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
261 l->avl_right = lrl; /* lrl: RH or RH-1 */ 278 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 l->avl_height = rh + 1; /* l: RH+1 */ 279 l->avl_height = rh + 1; /* l: RH+1 */
263 lr->avl_left = l; /* l: RH+1 */ 280 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
264 lr->avl_right = node; /* node: RH+1 */ 281 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 lr->avl_height = rh + 2; 282 lr->avl_height = rh + 2;
266 *nodep = lr; 283 RCU_INIT_POINTER(*nodep, lr);
267 } 284 }
268 } else if (rh > lh + 1) { /* r: LH+2 */ 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 struct inet_peer *rr, *rl, *rlr, *rll; 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 int rlh; 287 int rlh;
271 rr = r->avl_right; 288 rr = rcu_dereference_protected(r->avl_right,
272 rl = r->avl_left; 289 lockdep_is_held(&peers.lock));
290 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock));
273 rlh = node_height(rl); 292 rlh = node_height(rl);
274 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275 node->avl_right = rl; /* rl: LH or LH+1 */ 294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
276 node->avl_left = l; /* l: LH */ 295 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 node->avl_height = rlh + 1; /* LH+1 or LH+2 */ 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278 r->avl_right = rr; /* rr: LH+1 */ 297 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
279 r->avl_left = node; /* node: LH+1 or LH+2 */ 298 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 r->avl_height = node->avl_height + 1; 299 r->avl_height = node->avl_height + 1;
281 *nodep = r; 300 RCU_INIT_POINTER(*nodep, r);
282 } else { /* rr: RH, rl: RH+1 */ 301 } else { /* rr: RH, rl: RH+1 */
283 rlr = rl->avl_right; /* rlr: LH or LH-1 */ 302 rlr = rcu_dereference_protected(rl->avl_right,
284 rll = rl->avl_left; /* rll: LH or LH-1 */ 303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
285 node->avl_right = rll; /* rll: LH or LH-1 */ 304 rll = rcu_dereference_protected(rl->avl_left,
286 node->avl_left = l; /* l: LH */ 305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 node->avl_height = lh + 1; /* node: LH+1 */ 308 node->avl_height = lh + 1; /* node: LH+1 */
288 r->avl_right = rr; /* rr: LH */ 309 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
289 r->avl_left = rlr; /* rlr: LH or LH-1 */ 310 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 r->avl_height = lh + 1; /* r: LH+1 */ 311 r->avl_height = lh + 1; /* r: LH+1 */
291 rl->avl_right = r; /* r: LH+1 */ 312 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
292 rl->avl_left = node; /* node: LH+1 */ 313 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 rl->avl_height = lh + 2; 314 rl->avl_height = lh + 2;
294 *nodep = rl; 315 RCU_INIT_POINTER(*nodep, rl);
295 } 316 }
296 } else { 317 } else {
297 node->avl_height = (lh > rh ? lh : rh) + 1; 318 node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
303#define link_to_pool(n) \ 324#define link_to_pool(n) \
304do { \ 325do { \
305 n->avl_height = 1; \ 326 n->avl_height = 1; \
306 n->avl_left = peer_avl_empty; \ 327 n->avl_left = peer_avl_empty_rcu; \
307 n->avl_right = peer_avl_empty; \ 328 n->avl_right = peer_avl_empty_rcu; \
308 smp_wmb(); /* lockless readers can catch us now */ \ 329 /* lockless readers can catch us now */ \
309 **--stackptr = n; \ 330 rcu_assign_pointer(**--stackptr, n); \
310 peer_avl_rebalance(stack, stackptr); \ 331 peer_avl_rebalance(stack, stackptr); \
311} while (0) 332} while (0)
312 333
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
330 * We use refcnt=-1 to alert lockless readers this entry is deleted. 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 */ 352 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333 struct inet_peer **stack[PEER_MAXDEPTH]; 354 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
334 struct inet_peer ***stackptr, ***delp; 355 struct inet_peer __rcu ***stackptr, ***delp;
335 if (lookup(p->v4daddr, stack) != p) 356 if (lookup(p->v4daddr, stack) != p)
336 BUG(); 357 BUG();
337 delp = stackptr - 1; /* *delp[0] == p */ 358 delp = stackptr - 1; /* *delp[0] == p */
338 if (p->avl_left == peer_avl_empty) { 359 if (p->avl_left == peer_avl_empty_rcu) {
339 *delp[0] = p->avl_right; 360 *delp[0] = p->avl_right;
340 --stackptr; 361 --stackptr;
341 } else { 362 } else {
342 /* look for a node to insert instead of p */ 363 /* look for a node to insert instead of p */
343 struct inet_peer *t; 364 struct inet_peer *t;
344 t = lookup_rightempty(p); 365 t = lookup_rightempty(p);
345 BUG_ON(*stackptr[-1] != t); 366 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t);
346 **--stackptr = t->avl_left; 368 **--stackptr = t->avl_left;
347 /* t is removed, t->v4daddr > x->v4daddr for any 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 * x in p->avl_left subtree. 370 * x in p->avl_left subtree.
349 * Put t in the old place of p. */ 371 * Put t in the old place of p. */
350 *delp[0] = t; 372 RCU_INIT_POINTER(*delp[0], t);
351 t->avl_left = p->avl_left; 373 t->avl_left = p->avl_left;
352 t->avl_right = p->avl_right; 374 t->avl_right = p->avl_right;
353 t->avl_height = p->avl_height; 375 t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
414struct inet_peer *inet_getpeer(__be32 daddr, int create) 436struct inet_peer *inet_getpeer(__be32 daddr, int create)
415{ 437{
416 struct inet_peer *p; 438 struct inet_peer *p;
417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440
419 /* Look up for the address quickly, lockless. 441 /* Look up for the address quickly, lockless.
420 * Because of a concurrent writer, we might not find an existing entry. 442 * Because of a concurrent writer, we might not find an existing entry.
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..168440834ade 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
116 struct ip4_create_arg *arg = a; 116 struct ip4_create_arg *arg = a;
117 117
118 qp = container_of(q, struct ipq, q); 118 qp = container_of(q, struct ipq, q);
119 return (qp->id == arg->iph->id && 119 return qp->id == arg->iph->id &&
120 qp->saddr == arg->iph->saddr && 120 qp->saddr == arg->iph->saddr &&
121 qp->daddr == arg->iph->daddr && 121 qp->daddr == arg->iph->daddr &&
122 qp->protocol == arg->iph->protocol && 122 qp->protocol == arg->iph->protocol &&
123 qp->user == arg->user); 123 qp->user == arg->user;
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 542 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 543 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 544 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 545 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 546 struct sk_buff *clone;
547 int i, plen = 0; 547 int i, plen = 0;
548 548
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a46..70ff77f02eee 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -63,13 +64,13 @@
63 We cannot track such dead loops during route installation, 64 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be 65 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl), 66 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best 67 and silently drop packet when it expires. It is a good
67 solution, but it supposes maintaing new variable in ALL 68 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used. 69 skb, even if no tunneling is used.
69 70
70 Current solution: HARD_TX_LOCK lock breaks dead loops. 71 Current solution: xmit_recursion breaks dead loops. This is a percpu
71 72 counter, since when we enter the first ndo_xmit(), cpu migration is
72 73 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 74
74 2. Networking dead loops would not kill routers, but would really 75 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case, 76 kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 129
129static int ipgre_net_id __read_mostly; 130static int ipgre_net_id __read_mostly;
130struct ipgre_net { 131struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
132 133
133 struct net_device *fb_tunnel_dev; 134 struct net_device *fb_tunnel_dev;
134}; 135};
@@ -158,13 +159,40 @@ struct ipgre_net {
158#define tunnels_l tunnels[1] 159#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0] 160#define tunnels_wc tunnels[0]
160/* 161/*
161 * Locking : hash tables are protected by RCU and a spinlock 162 * Locking : hash tables are protected by RCU and RTNL
162 */ 163 */
163static DEFINE_SPINLOCK(ipgre_lock);
164 164
165#define for_each_ip_tunnel_rcu(start) \ 165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 167
168/* often modified stats are per cpu, other are shared (netdev->stats) */
169struct pcpu_tstats {
170 unsigned long rx_packets;
171 unsigned long rx_bytes;
172 unsigned long tx_packets;
173 unsigned long tx_bytes;
174};
175
176static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177{
178 struct pcpu_tstats sum = { 0 };
179 int i;
180
181 for_each_possible_cpu(i) {
182 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
183
184 sum.rx_packets += tstats->rx_packets;
185 sum.rx_bytes += tstats->rx_bytes;
186 sum.tx_packets += tstats->tx_packets;
187 sum.tx_bytes += tstats->tx_bytes;
188 }
189 dev->stats.rx_packets = sum.rx_packets;
190 dev->stats.rx_bytes = sum.rx_bytes;
191 dev->stats.tx_packets = sum.tx_packets;
192 dev->stats.tx_bytes = sum.tx_bytes;
193 return &dev->stats;
194}
195
168/* Given src, dst and key, find appropriate for input tunnel. */ 196/* Given src, dst and key, find appropriate for input tunnel. */
169 197
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 198static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -173,8 +201,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
173{ 201{
174 struct net *net = dev_net(dev); 202 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 203 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 204 unsigned int h0 = HASH(remote);
177 unsigned h1 = HASH(key); 205 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 206 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 207 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 208 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +317,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
289 return NULL; 317 return NULL;
290} 318}
291 319
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 320static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 321 struct ip_tunnel_parm *parms)
294{ 322{
295 __be32 remote = parms->iph.daddr; 323 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 324 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 325 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 326 unsigned int h = HASH(key);
299 int prio = 0; 327 int prio = 0;
300 328
301 if (local) 329 if (local)
@@ -308,7 +336,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
308 return &ign->tunnels[prio][h]; 336 return &ign->tunnels[prio][h];
309} 337}
310 338
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 339static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 340 struct ip_tunnel *t)
313{ 341{
314 return __ipgre_bucket(ign, &t->parms); 342 return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +344,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
316 344
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 345static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{ 346{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 347 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320 348
321 spin_lock_bh(&ipgre_lock); 349 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 350 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325} 351}
326 352
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 353static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{ 354{
329 struct ip_tunnel **tp; 355 struct ip_tunnel __rcu **tp;
330 356 struct ip_tunnel *iter;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 357
332 if (t == *tp) { 358 for (tp = ipgre_bucket(ign, t);
333 spin_lock_bh(&ipgre_lock); 359 (iter = rtnl_dereference(*tp)) != NULL;
334 *tp = t->next; 360 tp = &iter->next) {
335 spin_unlock_bh(&ipgre_lock); 361 if (t == iter) {
362 rcu_assign_pointer(*tp, t->next);
336 break; 363 break;
337 } 364 }
338 } 365 }
@@ -346,10 +373,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
346 __be32 local = parms->iph.saddr; 373 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 374 __be32 key = parms->i_key;
348 int link = parms->link; 375 int link = parms->link;
349 struct ip_tunnel *t, **tp; 376 struct ip_tunnel *t;
377 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 378 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 379
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 380 for (tp = __ipgre_bucket(ign, parms);
381 (t = rtnl_dereference(*tp)) != NULL;
382 tp = &t->next)
353 if (local == t->parms.iph.saddr && 383 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 384 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 385 key == t->parms.i_key &&
@@ -360,7 +390,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
360 return t; 390 return t;
361} 391}
362 392
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 393static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 394 struct ip_tunnel_parm *parms, int create)
365{ 395{
366 struct ip_tunnel *t, *nt; 396 struct ip_tunnel *t, *nt;
@@ -582,7 +612,7 @@ static int ipgre_rcv(struct sk_buff *skb)
582 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 612 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583 iph->saddr, iph->daddr, key, 613 iph->saddr, iph->daddr, key,
584 gre_proto))) { 614 gre_proto))) {
585 struct net_device_stats *stats = &tunnel->dev->stats; 615 struct pcpu_tstats *tstats;
586 616
587 secpath_reset(skb); 617 secpath_reset(skb);
588 618
@@ -606,22 +636,22 @@ static int ipgre_rcv(struct sk_buff *skb)
606 /* Looped back packet, drop it! */ 636 /* Looped back packet, drop it! */
607 if (skb_rtable(skb)->fl.iif == 0) 637 if (skb_rtable(skb)->fl.iif == 0)
608 goto drop; 638 goto drop;
609 stats->multicast++; 639 tunnel->dev->stats.multicast++;
610 skb->pkt_type = PACKET_BROADCAST; 640 skb->pkt_type = PACKET_BROADCAST;
611 } 641 }
612#endif 642#endif
613 643
614 if (((flags&GRE_CSUM) && csum) || 644 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 645 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616 stats->rx_crc_errors++; 646 tunnel->dev->stats.rx_crc_errors++;
617 stats->rx_errors++; 647 tunnel->dev->stats.rx_errors++;
618 goto drop; 648 goto drop;
619 } 649 }
620 if (tunnel->parms.i_flags&GRE_SEQ) { 650 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) || 651 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 652 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623 stats->rx_fifo_errors++; 653 tunnel->dev->stats.rx_fifo_errors++;
624 stats->rx_errors++; 654 tunnel->dev->stats.rx_errors++;
625 goto drop; 655 goto drop;
626 } 656 }
627 tunnel->i_seqno = seqno + 1; 657 tunnel->i_seqno = seqno + 1;
@@ -630,8 +660,8 @@ static int ipgre_rcv(struct sk_buff *skb)
630 /* Warning: All skb pointers will be invalidated! */ 660 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) { 661 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) { 662 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++; 663 tunnel->dev->stats.rx_length_errors++;
634 stats->rx_errors++; 664 tunnel->dev->stats.rx_errors++;
635 goto drop; 665 goto drop;
636 } 666 }
637 667
@@ -640,14 +670,19 @@ static int ipgre_rcv(struct sk_buff *skb)
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 670 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 } 671 }
642 672
643 skb_tunnel_rx(skb, tunnel->dev); 673 tstats = this_cpu_ptr(tunnel->dev->tstats);
674 tstats->rx_packets++;
675 tstats->rx_bytes += skb->len;
676
677 __skb_tunnel_rx(skb, tunnel->dev);
644 678
645 skb_reset_network_header(skb); 679 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb); 680 ipgre_ecn_decapsulate(iph, skb);
647 681
648 netif_rx(skb); 682 netif_rx(skb);
683
649 rcu_read_unlock(); 684 rcu_read_unlock();
650 return(0); 685 return 0;
651 } 686 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 687 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653 688
@@ -655,20 +690,19 @@ drop:
655 rcu_read_unlock(); 690 rcu_read_unlock();
656drop_nolock: 691drop_nolock:
657 kfree_skb(skb); 692 kfree_skb(skb);
658 return(0); 693 return 0;
659} 694}
660 695
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 696static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{ 697{
663 struct ip_tunnel *tunnel = netdev_priv(dev); 698 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct net_device_stats *stats = &dev->stats; 699 struct pcpu_tstats *tstats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb); 700 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph; 701 struct iphdr *tiph;
668 u8 tos; 702 u8 tos;
669 __be16 df; 703 __be16 df;
670 struct rtable *rt; /* Route to the other host */ 704 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */ 705 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */ 706 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */ 707 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen; 708 int gre_hlen;
@@ -690,7 +724,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
690 /* NBMA tunnel */ 724 /* NBMA tunnel */
691 725
692 if (skb_dst(skb) == NULL) { 726 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++; 727 dev->stats.tx_fifo_errors++;
694 goto tx_error; 728 goto tx_error;
695 } 729 }
696 730
@@ -736,14 +770,20 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
736 } 770 }
737 771
738 { 772 {
739 struct flowi fl = { .oif = tunnel->parms.link, 773 struct flowi fl = {
740 .nl_u = { .ip4_u = 774 .oif = tunnel->parms.link,
741 { .daddr = dst, 775 .nl_u = {
742 .saddr = tiph->saddr, 776 .ip4_u = {
743 .tos = RT_TOS(tos) } }, 777 .daddr = dst,
744 .proto = IPPROTO_GRE }; 778 .saddr = tiph->saddr,
779 .tos = RT_TOS(tos)
780 }
781 },
782 .proto = IPPROTO_GRE
783 }
784;
745 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 785 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
746 stats->tx_carrier_errors++; 786 dev->stats.tx_carrier_errors++;
747 goto tx_error; 787 goto tx_error;
748 } 788 }
749 } 789 }
@@ -751,7 +791,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
751 791
752 if (tdev == dev) { 792 if (tdev == dev) {
753 ip_rt_put(rt); 793 ip_rt_put(rt);
754 stats->collisions++; 794 dev->stats.collisions++;
755 goto tx_error; 795 goto tx_error;
756 } 796 }
757 797
@@ -814,7 +854,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
814 dev->needed_headroom = max_headroom; 854 dev->needed_headroom = max_headroom;
815 if (!new_skb) { 855 if (!new_skb) {
816 ip_rt_put(rt); 856 ip_rt_put(rt);
817 txq->tx_dropped++; 857 dev->stats.tx_dropped++;
818 dev_kfree_skb(skb); 858 dev_kfree_skb(skb);
819 return NETDEV_TX_OK; 859 return NETDEV_TX_OK;
820 } 860 }
@@ -881,15 +921,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
881 } 921 }
882 922
883 nf_reset(skb); 923 nf_reset(skb);
884 924 tstats = this_cpu_ptr(dev->tstats);
885 IPTUNNEL_XMIT(); 925 __IPTUNNEL_XMIT(tstats, &dev->stats);
886 return NETDEV_TX_OK; 926 return NETDEV_TX_OK;
887 927
888tx_error_icmp: 928tx_error_icmp:
889 dst_link_failure(skb); 929 dst_link_failure(skb);
890 930
891tx_error: 931tx_error:
892 stats->tx_errors++; 932 dev->stats.tx_errors++;
893 dev_kfree_skb(skb); 933 dev_kfree_skb(skb);
894 return NETDEV_TX_OK; 934 return NETDEV_TX_OK;
895} 935}
@@ -909,13 +949,19 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
909 /* Guess output device to choose reasonable mtu and needed_headroom */ 949 /* Guess output device to choose reasonable mtu and needed_headroom */
910 950
911 if (iph->daddr) { 951 if (iph->daddr) {
912 struct flowi fl = { .oif = tunnel->parms.link, 952 struct flowi fl = {
913 .nl_u = { .ip4_u = 953 .oif = tunnel->parms.link,
914 { .daddr = iph->daddr, 954 .nl_u = {
915 .saddr = iph->saddr, 955 .ip4_u = {
916 .tos = RT_TOS(iph->tos) } }, 956 .daddr = iph->daddr,
917 .proto = IPPROTO_GRE }; 957 .saddr = iph->saddr,
958 .tos = RT_TOS(iph->tos)
959 }
960 },
961 .proto = IPPROTO_GRE
962 };
918 struct rtable *rt; 963 struct rtable *rt;
964
919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 965 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
920 tdev = rt->dst.dev; 966 tdev = rt->dst.dev;
921 ip_rt_put(rt); 967 ip_rt_put(rt);
@@ -1012,7 +1058,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1012 break; 1058 break;
1013 } 1059 }
1014 } else { 1060 } else {
1015 unsigned nflags = 0; 1061 unsigned int nflags = 0;
1016 1062
1017 t = netdev_priv(dev); 1063 t = netdev_priv(dev);
1018 1064
@@ -1026,6 +1072,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1026 break; 1072 break;
1027 } 1073 }
1028 ipgre_tunnel_unlink(ign, t); 1074 ipgre_tunnel_unlink(ign, t);
1075 synchronize_net();
1029 t->parms.iph.saddr = p.iph.saddr; 1076 t->parms.iph.saddr = p.iph.saddr;
1030 t->parms.iph.daddr = p.iph.daddr; 1077 t->parms.iph.daddr = p.iph.daddr;
1031 t->parms.i_key = p.i_key; 1078 t->parms.i_key = p.i_key;
@@ -1125,7 +1172,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1125 1172
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1173static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type, 1174 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len) 1175 const void *daddr, const void *saddr, unsigned int len)
1129{ 1176{
1130 struct ip_tunnel *t = netdev_priv(dev); 1177 struct ip_tunnel *t = netdev_priv(dev);
1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1178 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1167,13 +1214,19 @@ static int ipgre_open(struct net_device *dev)
1167 struct ip_tunnel *t = netdev_priv(dev); 1214 struct ip_tunnel *t = netdev_priv(dev);
1168 1215
1169 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1216 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170 struct flowi fl = { .oif = t->parms.link, 1217 struct flowi fl = {
1171 .nl_u = { .ip4_u = 1218 .oif = t->parms.link,
1172 { .daddr = t->parms.iph.daddr, 1219 .nl_u = {
1173 .saddr = t->parms.iph.saddr, 1220 .ip4_u = {
1174 .tos = RT_TOS(t->parms.iph.tos) } }, 1221 .daddr = t->parms.iph.daddr,
1175 .proto = IPPROTO_GRE }; 1222 .saddr = t->parms.iph.saddr,
1223 .tos = RT_TOS(t->parms.iph.tos)
1224 }
1225 },
1226 .proto = IPPROTO_GRE
1227 };
1176 struct rtable *rt; 1228 struct rtable *rt;
1229
1177 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1230 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1178 return -EADDRNOTAVAIL; 1231 return -EADDRNOTAVAIL;
1179 dev = rt->dst.dev; 1232 dev = rt->dst.dev;
@@ -1193,10 +1246,8 @@ static int ipgre_close(struct net_device *dev)
1193 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1246 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1194 struct in_device *in_dev; 1247 struct in_device *in_dev;
1195 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1248 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1196 if (in_dev) { 1249 if (in_dev)
1197 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1250 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198 in_dev_put(in_dev);
1199 }
1200 } 1251 }
1201 return 0; 1252 return 0;
1202} 1253}
@@ -1213,12 +1264,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_start_xmit = ipgre_tunnel_xmit, 1264 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1265 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1266 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1267 .ndo_get_stats = ipgre_get_stats,
1216}; 1268};
1217 1269
1270static void ipgre_dev_free(struct net_device *dev)
1271{
1272 free_percpu(dev->tstats);
1273 free_netdev(dev);
1274}
1275
1218static void ipgre_tunnel_setup(struct net_device *dev) 1276static void ipgre_tunnel_setup(struct net_device *dev)
1219{ 1277{
1220 dev->netdev_ops = &ipgre_netdev_ops; 1278 dev->netdev_ops = &ipgre_netdev_ops;
1221 dev->destructor = free_netdev; 1279 dev->destructor = ipgre_dev_free;
1222 1280
1223 dev->type = ARPHRD_IPGRE; 1281 dev->type = ARPHRD_IPGRE;
1224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1282 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1256,6 +1314,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1256 } else 1314 } else
1257 dev->header_ops = &ipgre_header_ops; 1315 dev->header_ops = &ipgre_header_ops;
1258 1316
1317 dev->tstats = alloc_percpu(struct pcpu_tstats);
1318 if (!dev->tstats)
1319 return -ENOMEM;
1320
1259 return 0; 1321 return 0;
1260} 1322}
1261 1323
@@ -1263,7 +1325,6 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1263{ 1325{
1264 struct ip_tunnel *tunnel = netdev_priv(dev); 1326 struct ip_tunnel *tunnel = netdev_priv(dev);
1265 struct iphdr *iph = &tunnel->parms.iph; 1327 struct iphdr *iph = &tunnel->parms.iph;
1266 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1267 1328
1268 tunnel->dev = dev; 1329 tunnel->dev = dev;
1269 strcpy(tunnel->parms.name, dev->name); 1330 strcpy(tunnel->parms.name, dev->name);
@@ -1274,14 +1335,12 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1274 tunnel->hlen = sizeof(struct iphdr) + 4; 1335 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 1336
1276 dev_hold(dev); 1337 dev_hold(dev);
1277 ign->tunnels_wc[0] = tunnel;
1278} 1338}
1279 1339
1280 1340
1281static const struct net_protocol ipgre_protocol = { 1341static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1342 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1343 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1344};
1286 1345
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1346static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1350,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 for (prio = 0; prio < 4; prio++) { 1350 for (prio = 0; prio < 4; prio++) {
1292 int h; 1351 int h;
1293 for (h = 0; h < HASH_SIZE; h++) { 1352 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t = ign->tunnels[prio][h]; 1353 struct ip_tunnel *t;
1354
1355 t = rtnl_dereference(ign->tunnels[prio][h]);
1295 1356
1296 while (t != NULL) { 1357 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head); 1358 unregister_netdevice_queue(t->dev, head);
1298 t = t->next; 1359 t = rtnl_dereference(t->next);
1299 } 1360 }
1300 } 1361 }
1301 } 1362 }
@@ -1320,10 +1381,12 @@ static int __net_init ipgre_init_net(struct net *net)
1320 if ((err = register_netdev(ign->fb_tunnel_dev))) 1381 if ((err = register_netdev(ign->fb_tunnel_dev)))
1321 goto err_reg_dev; 1382 goto err_reg_dev;
1322 1383
1384 rcu_assign_pointer(ign->tunnels_wc[0],
1385 netdev_priv(ign->fb_tunnel_dev));
1323 return 0; 1386 return 0;
1324 1387
1325err_reg_dev: 1388err_reg_dev:
1326 free_netdev(ign->fb_tunnel_dev); 1389 ipgre_dev_free(ign->fb_tunnel_dev);
1327err_alloc_dev: 1390err_alloc_dev:
1328 return err; 1391 return err;
1329} 1392}
@@ -1441,6 +1504,10 @@ static int ipgre_tap_init(struct net_device *dev)
1441 1504
1442 ipgre_tunnel_bind_dev(dev); 1505 ipgre_tunnel_bind_dev(dev);
1443 1506
1507 dev->tstats = alloc_percpu(struct pcpu_tstats);
1508 if (!dev->tstats)
1509 return -ENOMEM;
1510
1444 return 0; 1511 return 0;
1445} 1512}
1446 1513
@@ -1451,6 +1518,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_set_mac_address = eth_mac_addr, 1518 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr, 1519 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1520 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1521 .ndo_get_stats = ipgre_get_stats,
1454}; 1522};
1455 1523
1456static void ipgre_tap_setup(struct net_device *dev) 1524static void ipgre_tap_setup(struct net_device *dev)
@@ -1459,7 +1527,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1459 ether_setup(dev); 1527 ether_setup(dev);
1460 1528
1461 dev->netdev_ops = &ipgre_tap_netdev_ops; 1529 dev->netdev_ops = &ipgre_tap_netdev_ops;
1462 dev->destructor = free_netdev; 1530 dev->destructor = ipgre_dev_free;
1463 1531
1464 dev->iflink = 0; 1532 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL; 1533 dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1487,6 +1555,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
1487 if (!tb[IFLA_MTU]) 1555 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu; 1556 dev->mtu = mtu;
1489 1557
1558 /* Can use a lockless transmit, unless we generate output sequences */
1559 if (!(nt->parms.o_flags & GRE_SEQ))
1560 dev->features |= NETIF_F_LLTX;
1561
1490 err = register_netdevice(dev); 1562 err = register_netdevice(dev);
1491 if (err) 1563 if (err)
1492 goto out; 1564 goto out;
@@ -1522,7 +1594,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1522 t = nt; 1594 t = nt;
1523 1595
1524 if (dev->type != ARPHRD_ETHER) { 1596 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0; 1597 unsigned int nflags = 0;
1526 1598
1527 if (ipv4_is_multicast(p.iph.daddr)) 1599 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST; 1600 nflags = IFF_BROADCAST;
@@ -1663,7 +1735,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1735 if (err < 0)
1664 return err; 1736 return err;
1665 1737
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1738 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1739 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1740 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1741 goto add_proto_failed;
@@ -1683,7 +1755,7 @@ out:
1683tap_ops_failed: 1755tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1756 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1757rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1758 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1759add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1760 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1761 goto out;
@@ -1693,7 +1765,7 @@ static void __exit ipgre_fini(void)
1693{ 1765{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1766 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1767 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1768 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1769 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1770 unregister_pernet_device(&ipgre_net_ops);
1699} 1771}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ba9836c488ed..1906fa35860c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -466,7 +466,7 @@ error:
466 } 466 }
467 return -EINVAL; 467 return -EINVAL;
468} 468}
469 469EXPORT_SYMBOL(ip_options_compile);
470 470
471/* 471/*
472 * Undo all the changes done by ip_options_compile(). 472 * Undo all the changes done by ip_options_compile().
@@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb)
646 } 646 }
647 return 0; 647 return 0;
648} 648}
649EXPORT_SYMBOL(ip_options_rcv_srr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7649d7750075..439d2a34ee44 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
487 * LATER: this step can be merged to real generation of fragments, 487 * LATER: this step can be merged to real generation of fragments,
488 * we can switch to copy when see the first bad fragment. 488 * we can switch to copy when see the first bad fragment.
489 */ 489 */
490 if (skb_has_frags(skb)) { 490 if (skb_has_frag_list(skb)) {
491 struct sk_buff *frag, *frag2; 491 struct sk_buff *frag, *frag2;
492 int first_len = skb_pagelen(skb); 492 int first_len = skb_pagelen(skb);
493 493
@@ -844,10 +844,9 @@ int ip_append_data(struct sock *sk,
844 inet->cork.length = 0; 844 inet->cork.length = 0;
845 sk->sk_sndmsg_page = NULL; 845 sk->sk_sndmsg_page = NULL;
846 sk->sk_sndmsg_off = 0; 846 sk->sk_sndmsg_off = 0;
847 if ((exthdrlen = rt->dst.header_len) != 0) { 847 exthdrlen = rt->dst.header_len;
848 length += exthdrlen; 848 length += exthdrlen;
849 transhdrlen += exthdrlen; 849 transhdrlen += exthdrlen;
850 }
851 } else { 850 } else {
852 rt = (struct rtable *)inet->cork.dst; 851 rt = (struct rtable *)inet->cork.dst;
853 if (inet->cork.flags & IPCORK_OPT) 852 if (inet->cork.flags & IPCORK_OPT)
@@ -934,16 +933,19 @@ alloc_new_skb:
934 !(rt->dst.dev->features&NETIF_F_SG)) 933 !(rt->dst.dev->features&NETIF_F_SG))
935 alloclen = mtu; 934 alloclen = mtu;
936 else 935 else
937 alloclen = datalen + fragheaderlen; 936 alloclen = fraglen;
938 937
939 /* The last fragment gets additional space at tail. 938 /* The last fragment gets additional space at tail.
940 * Note, with MSG_MORE we overallocate on fragments, 939 * Note, with MSG_MORE we overallocate on fragments,
941 * because we have no idea what fragment will be 940 * because we have no idea what fragment will be
942 * the last. 941 * the last.
943 */ 942 */
944 if (datalen == length + fraggap) 943 if (datalen == length + fraggap) {
945 alloclen += rt->dst.trailer_len; 944 alloclen += rt->dst.trailer_len;
946 945 /* make sure mtu is not reached */
946 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
947 datalen -= ALIGN(rt->dst.trailer_len, 8);
948 }
947 if (transhdrlen) { 949 if (transhdrlen) {
948 skb = sock_alloc_send_skb(sk, 950 skb = sock_alloc_send_skb(sk,
949 alloclen + hh_len + 15, 951 alloclen + hh_len + 15,
@@ -960,7 +962,7 @@ alloc_new_skb:
960 else 962 else
961 /* only the initial fragment is 963 /* only the initial fragment is
962 time stamped */ 964 time stamped */
963 ipc->shtx.flags = 0; 965 ipc->tx_flags = 0;
964 } 966 }
965 if (skb == NULL) 967 if (skb == NULL)
966 goto error; 968 goto error;
@@ -971,7 +973,7 @@ alloc_new_skb:
971 skb->ip_summed = csummode; 973 skb->ip_summed = csummode;
972 skb->csum = 0; 974 skb->csum = 0;
973 skb_reserve(skb, hh_len); 975 skb_reserve(skb, hh_len);
974 *skb_tx(skb) = ipc->shtx; 976 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
975 977
976 /* 978 /*
977 * Find where to start putting bytes. 979 * Find where to start putting bytes.
@@ -1391,7 +1393,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1391 1393
1392 daddr = ipc.addr = rt->rt_src; 1394 daddr = ipc.addr = rt->rt_src;
1393 ipc.opt = NULL; 1395 ipc.opt = NULL;
1394 ipc.shtx.flags = 0; 1396 ipc.tx_flags = 0;
1395 1397
1396 if (replyopts.opt.optlen) { 1398 if (replyopts.opt.optlen) {
1397 ipc.opt = &replyopts.opt; 1399 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 64b70ad162e3..3948c86e59ca 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -238,7 +238,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
238 but receiver should be enough clever f.e. to forward mtrace requests, 238 but receiver should be enough clever f.e. to forward mtrace requests,
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain __rcu *ip_ra_chain;
242static DEFINE_SPINLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243 243
244 244
@@ -253,7 +253,8 @@ static void ip_ra_destroy_rcu(struct rcu_head *head)
253int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
254 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
255{ 255{
256 struct ip_ra_chain *ra, *new_ra, **rap; 256 struct ip_ra_chain *ra, *new_ra;
257 struct ip_ra_chain __rcu **rap;
257 258
258 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) 259 if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
259 return -EINVAL; 260 return -EINVAL;
@@ -261,7 +262,10 @@ int ip_ra_control(struct sock *sk, unsigned char on,
261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 262 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
262 263
263 spin_lock_bh(&ip_ra_lock); 264 spin_lock_bh(&ip_ra_lock);
264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 265 for (rap = &ip_ra_chain;
266 (ra = rcu_dereference_protected(*rap,
267 lockdep_is_held(&ip_ra_lock))) != NULL;
268 rap = &ra->next) {
265 if (ra->sk == sk) { 269 if (ra->sk == sk) {
266 if (on) { 270 if (on) {
267 spin_unlock_bh(&ip_ra_lock); 271 spin_unlock_bh(&ip_ra_lock);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70b..cd300aaee78f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -122,31 +122,59 @@
122 122
123static int ipip_net_id __read_mostly; 123static int ipip_net_id __read_mostly;
124struct ipip_net { 124struct ipip_net {
125 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 125 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 struct ip_tunnel *tunnels_r[HASH_SIZE]; 126 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 struct ip_tunnel *tunnels_l[HASH_SIZE]; 127 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 struct ip_tunnel *tunnels_wc[1]; 128 struct ip_tunnel __rcu *tunnels_wc[1];
129 struct ip_tunnel **tunnels[4]; 129 struct ip_tunnel __rcu **tunnels[4];
130 130
131 struct net_device *fb_tunnel_dev; 131 struct net_device *fb_tunnel_dev;
132}; 132};
133 133
134static void ipip_tunnel_init(struct net_device *dev); 134static int ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136static void ipip_dev_free(struct net_device *dev);
136 137
137/* 138/*
138 * Locking : hash tables are protected by RCU and a spinlock 139 * Locking : hash tables are protected by RCU and RTNL
139 */ 140 */
140static DEFINE_SPINLOCK(ipip_lock);
141 141
142#define for_each_ip_tunnel_rcu(start) \ 142#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 144
145/* often modified stats are per cpu, other are shared (netdev->stats) */
146struct pcpu_tstats {
147 unsigned long rx_packets;
148 unsigned long rx_bytes;
149 unsigned long tx_packets;
150 unsigned long tx_bytes;
151};
152
153static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154{
155 struct pcpu_tstats sum = { 0 };
156 int i;
157
158 for_each_possible_cpu(i) {
159 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161 sum.rx_packets += tstats->rx_packets;
162 sum.rx_bytes += tstats->rx_bytes;
163 sum.tx_packets += tstats->tx_packets;
164 sum.tx_bytes += tstats->tx_bytes;
165 }
166 dev->stats.rx_packets = sum.rx_packets;
167 dev->stats.rx_bytes = sum.rx_bytes;
168 dev->stats.tx_packets = sum.tx_packets;
169 dev->stats.tx_bytes = sum.tx_bytes;
170 return &dev->stats;
171}
172
145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 173static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 __be32 remote, __be32 local) 174 __be32 remote, __be32 local)
147{ 175{
148 unsigned h0 = HASH(remote); 176 unsigned int h0 = HASH(remote);
149 unsigned h1 = HASH(local); 177 unsigned int h1 = HASH(local);
150 struct ip_tunnel *t; 178 struct ip_tunnel *t;
151 struct ipip_net *ipn = net_generic(net, ipip_net_id); 179 struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 180
@@ -169,12 +197,12 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
169 return NULL; 197 return NULL;
170} 198}
171 199
172static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, 200static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
173 struct ip_tunnel_parm *parms) 201 struct ip_tunnel_parm *parms)
174{ 202{
175 __be32 remote = parms->iph.daddr; 203 __be32 remote = parms->iph.daddr;
176 __be32 local = parms->iph.saddr; 204 __be32 local = parms->iph.saddr;
177 unsigned h = 0; 205 unsigned int h = 0;
178 int prio = 0; 206 int prio = 0;
179 207
180 if (remote) { 208 if (remote) {
@@ -188,7 +216,7 @@ static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
188 return &ipn->tunnels[prio][h]; 216 return &ipn->tunnels[prio][h];
189} 217}
190 218
191static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, 219static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
192 struct ip_tunnel *t) 220 struct ip_tunnel *t)
193{ 221{
194 return __ipip_bucket(ipn, &t->parms); 222 return __ipip_bucket(ipn, &t->parms);
@@ -196,13 +224,14 @@ static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
196 224
197static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) 225static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198{ 226{
199 struct ip_tunnel **tp; 227 struct ip_tunnel __rcu **tp;
200 228 struct ip_tunnel *iter;
201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 229
202 if (t == *tp) { 230 for (tp = ipip_bucket(ipn, t);
203 spin_lock_bh(&ipip_lock); 231 (iter = rtnl_dereference(*tp)) != NULL;
204 *tp = t->next; 232 tp = &iter->next) {
205 spin_unlock_bh(&ipip_lock); 233 if (t == iter) {
234 rcu_assign_pointer(*tp, t->next);
206 break; 235 break;
207 } 236 }
208 } 237 }
@@ -210,12 +239,10 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
210 239
211static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) 240static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212{ 241{
213 struct ip_tunnel **tp = ipip_bucket(ipn, t); 242 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
214 243
215 spin_lock_bh(&ipip_lock); 244 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
216 t->next = *tp;
217 rcu_assign_pointer(*tp, t); 245 rcu_assign_pointer(*tp, t);
218 spin_unlock_bh(&ipip_lock);
219} 246}
220 247
221static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 248static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -223,12 +250,15 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
223{ 250{
224 __be32 remote = parms->iph.daddr; 251 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr; 252 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, **tp, *nt; 253 struct ip_tunnel *t, *nt;
254 struct ip_tunnel __rcu **tp;
227 struct net_device *dev; 255 struct net_device *dev;
228 char name[IFNAMSIZ]; 256 char name[IFNAMSIZ];
229 struct ipip_net *ipn = net_generic(net, ipip_net_id); 257 struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 258
231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { 259 for (tp = __ipip_bucket(ipn, parms);
260 (t = rtnl_dereference(*tp)) != NULL;
261 tp = &t->next) {
232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 262 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 return t; 263 return t;
234 } 264 }
@@ -238,7 +268,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
238 if (parms->name[0]) 268 if (parms->name[0])
239 strlcpy(name, parms->name, IFNAMSIZ); 269 strlcpy(name, parms->name, IFNAMSIZ);
240 else 270 else
241 sprintf(name, "tunl%%d"); 271 strcpy(name, "tunl%d");
242 272
243 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); 273 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 if (dev == NULL) 274 if (dev == NULL)
@@ -254,7 +284,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
254 nt = netdev_priv(dev); 284 nt = netdev_priv(dev);
255 nt->parms = *parms; 285 nt->parms = *parms;
256 286
257 ipip_tunnel_init(dev); 287 if (ipip_tunnel_init(dev) < 0)
288 goto failed_free;
258 289
259 if (register_netdevice(dev) < 0) 290 if (register_netdevice(dev) < 0)
260 goto failed_free; 291 goto failed_free;
@@ -264,20 +295,19 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
264 return nt; 295 return nt;
265 296
266failed_free: 297failed_free:
267 free_netdev(dev); 298 ipip_dev_free(dev);
268 return NULL; 299 return NULL;
269} 300}
270 301
302/* called with RTNL */
271static void ipip_tunnel_uninit(struct net_device *dev) 303static void ipip_tunnel_uninit(struct net_device *dev)
272{ 304{
273 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
274 struct ipip_net *ipn = net_generic(net, ipip_net_id); 306 struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 307
276 if (dev == ipn->fb_tunnel_dev) { 308 if (dev == ipn->fb_tunnel_dev)
277 spin_lock_bh(&ipip_lock); 309 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
278 ipn->tunnels_wc[0] = NULL; 310 else
279 spin_unlock_bh(&ipip_lock);
280 } else
281 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 311 ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 dev_put(dev); 312 dev_put(dev);
283} 313}
@@ -359,8 +389,10 @@ static int ipip_rcv(struct sk_buff *skb)
359 const struct iphdr *iph = ip_hdr(skb); 389 const struct iphdr *iph = ip_hdr(skb);
360 390
361 rcu_read_lock(); 391 rcu_read_lock();
362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 392 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
363 iph->saddr, iph->daddr)) != NULL) { 393 if (tunnel != NULL) {
394 struct pcpu_tstats *tstats;
395
364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 396 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 rcu_read_unlock(); 397 rcu_read_unlock();
366 kfree_skb(skb); 398 kfree_skb(skb);
@@ -374,10 +406,16 @@ static int ipip_rcv(struct sk_buff *skb)
374 skb->protocol = htons(ETH_P_IP); 406 skb->protocol = htons(ETH_P_IP);
375 skb->pkt_type = PACKET_HOST; 407 skb->pkt_type = PACKET_HOST;
376 408
377 skb_tunnel_rx(skb, tunnel->dev); 409 tstats = this_cpu_ptr(tunnel->dev->tstats);
410 tstats->rx_packets++;
411 tstats->rx_bytes += skb->len;
412
413 __skb_tunnel_rx(skb, tunnel->dev);
378 414
379 ipip_ecn_decapsulate(iph, skb); 415 ipip_ecn_decapsulate(iph, skb);
416
380 netif_rx(skb); 417 netif_rx(skb);
418
381 rcu_read_unlock(); 419 rcu_read_unlock();
382 return 0; 420 return 0;
383 } 421 }
@@ -394,13 +432,12 @@ static int ipip_rcv(struct sk_buff *skb)
394static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 432static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
395{ 433{
396 struct ip_tunnel *tunnel = netdev_priv(dev); 434 struct ip_tunnel *tunnel = netdev_priv(dev);
397 struct net_device_stats *stats = &dev->stats; 435 struct pcpu_tstats *tstats;
398 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
399 struct iphdr *tiph = &tunnel->parms.iph; 436 struct iphdr *tiph = &tunnel->parms.iph;
400 u8 tos = tunnel->parms.iph.tos; 437 u8 tos = tunnel->parms.iph.tos;
401 __be16 df = tiph->frag_off; 438 __be16 df = tiph->frag_off;
402 struct rtable *rt; /* Route to the other host */ 439 struct rtable *rt; /* Route to the other host */
403 struct net_device *tdev; /* Device to other host */ 440 struct net_device *tdev; /* Device to other host */
404 struct iphdr *old_iph = ip_hdr(skb); 441 struct iphdr *old_iph = ip_hdr(skb);
405 struct iphdr *iph; /* Our new IP header */ 442 struct iphdr *iph; /* Our new IP header */
406 unsigned int max_headroom; /* The extra header space needed */ 443 unsigned int max_headroom; /* The extra header space needed */
@@ -410,13 +447,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
410 if (skb->protocol != htons(ETH_P_IP)) 447 if (skb->protocol != htons(ETH_P_IP))
411 goto tx_error; 448 goto tx_error;
412 449
413 if (tos&1) 450 if (tos & 1)
414 tos = old_iph->tos; 451 tos = old_iph->tos;
415 452
416 if (!dst) { 453 if (!dst) {
417 /* NBMA tunnel */ 454 /* NBMA tunnel */
418 if ((rt = skb_rtable(skb)) == NULL) { 455 if ((rt = skb_rtable(skb)) == NULL) {
419 stats->tx_fifo_errors++; 456 dev->stats.tx_fifo_errors++;
420 goto tx_error; 457 goto tx_error;
421 } 458 }
422 if ((dst = rt->rt_gateway) == 0) 459 if ((dst = rt->rt_gateway) == 0)
@@ -424,14 +461,20 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
424 } 461 }
425 462
426 { 463 {
427 struct flowi fl = { .oif = tunnel->parms.link, 464 struct flowi fl = {
428 .nl_u = { .ip4_u = 465 .oif = tunnel->parms.link,
429 { .daddr = dst, 466 .nl_u = {
430 .saddr = tiph->saddr, 467 .ip4_u = {
431 .tos = RT_TOS(tos) } }, 468 .daddr = dst,
432 .proto = IPPROTO_IPIP }; 469 .saddr = tiph->saddr,
470 .tos = RT_TOS(tos)
471 }
472 },
473 .proto = IPPROTO_IPIP
474 };
475
433 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 476 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
434 stats->tx_carrier_errors++; 477 dev->stats.tx_carrier_errors++;
435 goto tx_error_icmp; 478 goto tx_error_icmp;
436 } 479 }
437 } 480 }
@@ -439,7 +482,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
439 482
440 if (tdev == dev) { 483 if (tdev == dev) {
441 ip_rt_put(rt); 484 ip_rt_put(rt);
442 stats->collisions++; 485 dev->stats.collisions++;
443 goto tx_error; 486 goto tx_error;
444 } 487 }
445 488
@@ -449,7 +492,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 492 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
450 493
451 if (mtu < 68) { 494 if (mtu < 68) {
452 stats->collisions++; 495 dev->stats.collisions++;
453 ip_rt_put(rt); 496 ip_rt_put(rt);
454 goto tx_error; 497 goto tx_error;
455 } 498 }
@@ -485,7 +528,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
485 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 528 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
486 if (!new_skb) { 529 if (!new_skb) {
487 ip_rt_put(rt); 530 ip_rt_put(rt);
488 txq->tx_dropped++; 531 dev->stats.tx_dropped++;
489 dev_kfree_skb(skb); 532 dev_kfree_skb(skb);
490 return NETDEV_TX_OK; 533 return NETDEV_TX_OK;
491 } 534 }
@@ -522,14 +565,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
522 iph->ttl = old_iph->ttl; 565 iph->ttl = old_iph->ttl;
523 566
524 nf_reset(skb); 567 nf_reset(skb);
525 568 tstats = this_cpu_ptr(dev->tstats);
526 IPTUNNEL_XMIT(); 569 __IPTUNNEL_XMIT(tstats, &dev->stats);
527 return NETDEV_TX_OK; 570 return NETDEV_TX_OK;
528 571
529tx_error_icmp: 572tx_error_icmp:
530 dst_link_failure(skb); 573 dst_link_failure(skb);
531tx_error: 574tx_error:
532 stats->tx_errors++; 575 dev->stats.tx_errors++;
533 dev_kfree_skb(skb); 576 dev_kfree_skb(skb);
534 return NETDEV_TX_OK; 577 return NETDEV_TX_OK;
535} 578}
@@ -544,13 +587,19 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
544 iph = &tunnel->parms.iph; 587 iph = &tunnel->parms.iph;
545 588
546 if (iph->daddr) { 589 if (iph->daddr) {
547 struct flowi fl = { .oif = tunnel->parms.link, 590 struct flowi fl = {
548 .nl_u = { .ip4_u = 591 .oif = tunnel->parms.link,
549 { .daddr = iph->daddr, 592 .nl_u = {
550 .saddr = iph->saddr, 593 .ip4_u = {
551 .tos = RT_TOS(iph->tos) } }, 594 .daddr = iph->daddr,
552 .proto = IPPROTO_IPIP }; 595 .saddr = iph->saddr,
596 .tos = RT_TOS(iph->tos)
597 }
598 },
599 .proto = IPPROTO_IPIP
600 };
553 struct rtable *rt; 601 struct rtable *rt;
602
554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 603 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 tdev = rt->dst.dev; 604 tdev = rt->dst.dev;
556 ip_rt_put(rt); 605 ip_rt_put(rt);
@@ -627,6 +676,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
627 } 676 }
628 t = netdev_priv(dev); 677 t = netdev_priv(dev);
629 ipip_tunnel_unlink(ipn, t); 678 ipip_tunnel_unlink(ipn, t);
679 synchronize_net();
630 t->parms.iph.saddr = p.iph.saddr; 680 t->parms.iph.saddr = p.iph.saddr;
631 t->parms.iph.daddr = p.iph.daddr; 681 t->parms.iph.daddr = p.iph.daddr;
632 memcpy(dev->dev_addr, &p.iph.saddr, 4); 682 memcpy(dev->dev_addr, &p.iph.saddr, 4);
@@ -696,13 +746,19 @@ static const struct net_device_ops ipip_netdev_ops = {
696 .ndo_start_xmit = ipip_tunnel_xmit, 746 .ndo_start_xmit = ipip_tunnel_xmit,
697 .ndo_do_ioctl = ipip_tunnel_ioctl, 747 .ndo_do_ioctl = ipip_tunnel_ioctl,
698 .ndo_change_mtu = ipip_tunnel_change_mtu, 748 .ndo_change_mtu = ipip_tunnel_change_mtu,
699 749 .ndo_get_stats = ipip_get_stats,
700}; 750};
701 751
752static void ipip_dev_free(struct net_device *dev)
753{
754 free_percpu(dev->tstats);
755 free_netdev(dev);
756}
757
702static void ipip_tunnel_setup(struct net_device *dev) 758static void ipip_tunnel_setup(struct net_device *dev)
703{ 759{
704 dev->netdev_ops = &ipip_netdev_ops; 760 dev->netdev_ops = &ipip_netdev_ops;
705 dev->destructor = free_netdev; 761 dev->destructor = ipip_dev_free;
706 762
707 dev->type = ARPHRD_TUNNEL; 763 dev->type = ARPHRD_TUNNEL;
708 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 764 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -711,10 +767,11 @@ static void ipip_tunnel_setup(struct net_device *dev)
711 dev->iflink = 0; 767 dev->iflink = 0;
712 dev->addr_len = 4; 768 dev->addr_len = 4;
713 dev->features |= NETIF_F_NETNS_LOCAL; 769 dev->features |= NETIF_F_NETNS_LOCAL;
770 dev->features |= NETIF_F_LLTX;
714 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 771 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
715} 772}
716 773
717static void ipip_tunnel_init(struct net_device *dev) 774static int ipip_tunnel_init(struct net_device *dev)
718{ 775{
719 struct ip_tunnel *tunnel = netdev_priv(dev); 776 struct ip_tunnel *tunnel = netdev_priv(dev);
720 777
@@ -725,9 +782,15 @@ static void ipip_tunnel_init(struct net_device *dev)
725 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 782 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
726 783
727 ipip_tunnel_bind_dev(dev); 784 ipip_tunnel_bind_dev(dev);
785
786 dev->tstats = alloc_percpu(struct pcpu_tstats);
787 if (!dev->tstats)
788 return -ENOMEM;
789
790 return 0;
728} 791}
729 792
730static void __net_init ipip_fb_tunnel_init(struct net_device *dev) 793static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
731{ 794{
732 struct ip_tunnel *tunnel = netdev_priv(dev); 795 struct ip_tunnel *tunnel = netdev_priv(dev);
733 struct iphdr *iph = &tunnel->parms.iph; 796 struct iphdr *iph = &tunnel->parms.iph;
@@ -740,11 +803,16 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
740 iph->protocol = IPPROTO_IPIP; 803 iph->protocol = IPPROTO_IPIP;
741 iph->ihl = 5; 804 iph->ihl = 5;
742 805
806 dev->tstats = alloc_percpu(struct pcpu_tstats);
807 if (!dev->tstats)
808 return -ENOMEM;
809
743 dev_hold(dev); 810 dev_hold(dev);
744 ipn->tunnels_wc[0] = tunnel; 811 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
812 return 0;
745} 813}
746 814
747static struct xfrm_tunnel ipip_handler = { 815static struct xfrm_tunnel ipip_handler __read_mostly = {
748 .handler = ipip_rcv, 816 .handler = ipip_rcv,
749 .err_handler = ipip_err, 817 .err_handler = ipip_err,
750 .priority = 1, 818 .priority = 1,
@@ -760,11 +828,12 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 for (prio = 1; prio < 4; prio++) { 828 for (prio = 1; prio < 4; prio++) {
761 int h; 829 int h;
762 for (h = 0; h < HASH_SIZE; h++) { 830 for (h = 0; h < HASH_SIZE; h++) {
763 struct ip_tunnel *t = ipn->tunnels[prio][h]; 831 struct ip_tunnel *t;
764 832
833 t = rtnl_dereference(ipn->tunnels[prio][h]);
765 while (t != NULL) { 834 while (t != NULL) {
766 unregister_netdevice_queue(t->dev, head); 835 unregister_netdevice_queue(t->dev, head);
767 t = t->next; 836 t = rtnl_dereference(t->next);
768 } 837 }
769 } 838 }
770 } 839 }
@@ -789,7 +858,9 @@ static int __net_init ipip_init_net(struct net *net)
789 } 858 }
790 dev_net_set(ipn->fb_tunnel_dev, net); 859 dev_net_set(ipn->fb_tunnel_dev, net);
791 860
792 ipip_fb_tunnel_init(ipn->fb_tunnel_dev); 861 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
862 if (err)
863 goto err_reg_dev;
793 864
794 if ((err = register_netdev(ipn->fb_tunnel_dev))) 865 if ((err = register_netdev(ipn->fb_tunnel_dev)))
795 goto err_reg_dev; 866 goto err_reg_dev;
@@ -797,7 +868,7 @@ static int __net_init ipip_init_net(struct net *net)
797 return 0; 868 return 0;
798 869
799err_reg_dev: 870err_reg_dev:
800 free_netdev(ipn->fb_tunnel_dev); 871 ipip_dev_free(ipn->fb_tunnel_dev);
801err_alloc_dev: 872err_alloc_dev:
802 /* nothing */ 873 /* nothing */
803 return err; 874 return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866fc..86dd5691af46 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -75,7 +75,7 @@ struct mr_table {
75 struct net *net; 75 struct net *net;
76#endif 76#endif
77 u32 id; 77 u32 id;
78 struct sock *mroute_sk; 78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer; 79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue; 80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES]; 81 struct list_head mfc_cache_array[MFC_LINES];
@@ -98,7 +98,7 @@ struct ipmr_result {
98}; 98};
99 99
100/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock. 101 * Note that the changes are semaphored via rtnl_lock.
102 */ 102 */
103 103
104static DEFINE_RWLOCK(mrt_lock); 104static DEFINE_RWLOCK(mrt_lock);
@@ -113,11 +113,11 @@ static DEFINE_RWLOCK(mrt_lock);
113static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
114 114
115/* We return to original Alan's scheme. Hash table of resolved 115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected 116 * entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected 117 * with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock. 118 * with strong spinlock mfc_unres_lock.
119 119 *
120 In this case data path is free of exclusive locks at all. 120 * In this case data path is free of exclusive locks at all.
121 */ 121 */
122 122
123static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
@@ -396,9 +396,9 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
396 set_fs(KERNEL_DS); 396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs); 398 set_fs(oldfs);
399 } else 399 } else {
400 err = -EOPNOTSUPP; 400 err = -EOPNOTSUPP;
401 401 }
402 dev = NULL; 402 dev = NULL;
403 403
404 if (err == 0 && 404 if (err == 0 &&
@@ -495,7 +495,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
495 dev->iflink = 0; 495 dev->iflink = 0;
496 496
497 rcu_read_lock(); 497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 498 in_dev = __in_dev_get_rcu(dev);
499 if (!in_dev) {
499 rcu_read_unlock(); 500 rcu_read_unlock();
500 goto failure; 501 goto failure;
501 } 502 }
@@ -552,9 +553,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
552 mrt->mroute_reg_vif_num = -1; 553 mrt->mroute_reg_vif_num = -1;
553#endif 554#endif
554 555
555 if (vifi+1 == mrt->maxvif) { 556 if (vifi + 1 == mrt->maxvif) {
556 int tmp; 557 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) { 558
559 for (tmp = vifi - 1; tmp >= 0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp)) 560 if (VIF_EXISTS(mrt, tmp))
559 break; 561 break;
560 } 562 }
@@ -565,25 +567,33 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
565 567
566 dev_set_allmulti(dev, -1); 568 dev_set_allmulti(dev, -1);
567 569
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 570 in_dev = __in_dev_get_rtnl(dev);
571 if (in_dev) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 572 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev); 573 ip_rt_multicast_event(in_dev);
571 } 574 }
572 575
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 576 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head); 577 unregister_netdevice_queue(dev, head);
575 578
576 dev_put(dev); 579 dev_put(dev);
577 return 0; 580 return 0;
578} 581}
579 582
580static inline void ipmr_cache_free(struct mfc_cache *c) 583static void ipmr_cache_free_rcu(struct rcu_head *head)
581{ 584{
585 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
586
582 kmem_cache_free(mrt_cachep, c); 587 kmem_cache_free(mrt_cachep, c);
583} 588}
584 589
590static inline void ipmr_cache_free(struct mfc_cache *c)
591{
592 call_rcu(&c->rcu, ipmr_cache_free_rcu);
593}
594
585/* Destroy an unresolved cache entry, killing queued skbs 595/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers. 596 * and reporting error to netlink readers.
587 */ 597 */
588 598
589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 599static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
@@ -605,8 +615,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
605 memset(&e->msg, 0, sizeof(e->msg)); 615 memset(&e->msg, 0, sizeof(e->msg));
606 616
607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 617 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 } else 618 } else {
609 kfree_skb(skb); 619 kfree_skb(skb);
620 }
610 } 621 }
611 622
612 ipmr_cache_free(c); 623 ipmr_cache_free(c);
@@ -724,13 +735,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
724 case 0: 735 case 0:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 736 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 737 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) { 738 if (dev && __in_dev_get_rtnl(dev) == NULL) {
728 dev_put(dev); 739 dev_put(dev);
729 return -EADDRNOTAVAIL; 740 return -EADDRNOTAVAIL;
730 } 741 }
731 } else 742 } else {
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 743 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733 744 }
734 if (!dev) 745 if (!dev)
735 return -EADDRNOTAVAIL; 746 return -EADDRNOTAVAIL;
736 err = dev_set_allmulti(dev, 1); 747 err = dev_set_allmulti(dev, 1);
@@ -743,16 +754,16 @@ static int vif_add(struct net *net, struct mr_table *mrt,
743 return -EINVAL; 754 return -EINVAL;
744 } 755 }
745 756
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 757 in_dev = __in_dev_get_rtnl(dev);
758 if (!in_dev) {
747 dev_put(dev); 759 dev_put(dev);
748 return -EADDRNOTAVAIL; 760 return -EADDRNOTAVAIL;
749 } 761 }
750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 762 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751 ip_rt_multicast_event(in_dev); 763 ip_rt_multicast_event(in_dev);
752 764
753 /* 765 /* Fill in the VIF structures */
754 * Fill in the VIF structures 766
755 */
756 v->rate_limit = vifc->vifc_rate_limit; 767 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr; 768 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr; 769 v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -765,14 +776,14 @@ static int vif_add(struct net *net, struct mr_table *mrt,
765 v->pkt_in = 0; 776 v->pkt_in = 0;
766 v->pkt_out = 0; 777 v->pkt_out = 0;
767 v->link = dev->ifindex; 778 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 779 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
769 v->link = dev->iflink; 780 v->link = dev->iflink;
770 781
771 /* And finish update writing critical data */ 782 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock); 783 write_lock_bh(&mrt_lock);
773 v->dev = dev; 784 v->dev = dev;
774#ifdef CONFIG_IP_PIMSM 785#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER) 786 if (v->flags & VIFF_REGISTER)
776 mrt->mroute_reg_vif_num = vifi; 787 mrt->mroute_reg_vif_num = vifi;
777#endif 788#endif
778 if (vifi+1 > mrt->maxvif) 789 if (vifi+1 > mrt->maxvif)
@@ -781,6 +792,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
781 return 0; 792 return 0;
782} 793}
783 794
795/* called with rcu_read_lock() */
784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 796static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785 __be32 origin, 797 __be32 origin,
786 __be32 mcastgrp) 798 __be32 mcastgrp)
@@ -788,7 +800,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
788 int line = MFC_HASH(mcastgrp, origin); 800 int line = MFC_HASH(mcastgrp, origin);
789 struct mfc_cache *c; 801 struct mfc_cache *c;
790 802
791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 803 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 804 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c; 805 return c;
794 } 806 }
@@ -801,19 +813,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
801static struct mfc_cache *ipmr_cache_alloc(void) 813static struct mfc_cache *ipmr_cache_alloc(void)
802{ 814{
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 815 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL) 816
805 return NULL; 817 if (c)
806 c->mfc_un.res.minvif = MAXVIFS; 818 c->mfc_un.res.minvif = MAXVIFS;
807 return c; 819 return c;
808} 820}
809 821
810static struct mfc_cache *ipmr_cache_alloc_unres(void) 822static struct mfc_cache *ipmr_cache_alloc_unres(void)
811{ 823{
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 824 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL) 825
814 return NULL; 826 if (c) {
815 skb_queue_head_init(&c->mfc_un.unres.unresolved); 827 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ; 828 c->mfc_un.unres.expires = jiffies + 10*HZ;
829 }
817 return c; 830 return c;
818} 831}
819 832
@@ -827,17 +840,15 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
827 struct sk_buff *skb; 840 struct sk_buff *skb;
828 struct nlmsgerr *e; 841 struct nlmsgerr *e;
829 842
830 /* 843 /* Play the pending entries through our router */
831 * Play the pending entries through our router
832 */
833 844
834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 845 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835 if (ip_hdr(skb)->version == 0) { 846 if (ip_hdr(skb)->version == 0) {
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 847 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837 848
838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 849 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839 nlh->nlmsg_len = (skb_tail_pointer(skb) - 850 nlh->nlmsg_len = skb_tail_pointer(skb) -
840 (u8 *)nlh); 851 (u8 *)nlh;
841 } else { 852 } else {
842 nlh->nlmsg_type = NLMSG_ERROR; 853 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 854 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -848,8 +859,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
848 } 859 }
849 860
850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 861 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851 } else 862 } else {
852 ip_mr_forward(net, mrt, skb, c, 0); 863 ip_mr_forward(net, mrt, skb, c, 0);
864 }
853 } 865 }
854} 866}
855 867
@@ -867,6 +879,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
867 const int ihl = ip_hdrlen(pkt); 879 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp; 880 struct igmphdr *igmp;
869 struct igmpmsg *msg; 881 struct igmpmsg *msg;
882 struct sock *mroute_sk;
870 int ret; 883 int ret;
871 884
872#ifdef CONFIG_IP_PIMSM 885#ifdef CONFIG_IP_PIMSM
@@ -882,9 +895,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
882#ifdef CONFIG_IP_PIMSM 895#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) { 896 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface. 897 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc. 898 * Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and 899 * And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-) 900 * to set msg->im_mbz to "mbz" :-)
888 */ 901 */
889 skb_push(skb, sizeof(struct iphdr)); 902 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb); 903 skb_reset_network_header(skb);
@@ -901,39 +914,38 @@ static int ipmr_cache_report(struct mr_table *mrt,
901#endif 914#endif
902 { 915 {
903 916
904 /* 917 /* Copy the IP header */
905 * Copy the IP header
906 */
907 918
908 skb->network_header = skb->tail; 919 skb->network_header = skb->tail;
909 skb_put(skb, ihl); 920 skb_put(skb, ihl);
910 skb_copy_to_linear_data(skb, pkt->data, ihl); 921 skb_copy_to_linear_data(skb, pkt->data, ihl);
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 922 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb); 923 msg = (struct igmpmsg *)skb_network_header(skb);
913 msg->im_vif = vifi; 924 msg->im_vif = vifi;
914 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 925 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915 926
916 /* 927 /* Add our header */
917 * Add our header
918 */
919 928
920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 929 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921 igmp->type = 930 igmp->type =
922 msg->im_msgtype = assert; 931 msg->im_msgtype = assert;
923 igmp->code = 0; 932 igmp->code = 0;
924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 933 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
925 skb->transport_header = skb->network_header; 934 skb->transport_header = skb->network_header;
926 } 935 }
927 936
928 if (mrt->mroute_sk == NULL) { 937 rcu_read_lock();
938 mroute_sk = rcu_dereference(mrt->mroute_sk);
939 if (mroute_sk == NULL) {
940 rcu_read_unlock();
929 kfree_skb(skb); 941 kfree_skb(skb);
930 return -EINVAL; 942 return -EINVAL;
931 } 943 }
932 944
933 /* 945 /* Deliver to mrouted */
934 * Deliver to mrouted 946
935 */ 947 ret = sock_queue_rcv_skb(mroute_sk, skb);
936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 948 rcu_read_unlock();
937 if (ret < 0) { 949 if (ret < 0) {
938 if (net_ratelimit()) 950 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 951 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -965,9 +977,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
965 } 977 }
966 978
967 if (!found) { 979 if (!found) {
968 /* 980 /* Create a new entry if allowable */
969 * Create a new entry if allowable
970 */
971 981
972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 982 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973 (c = ipmr_cache_alloc_unres()) == NULL) { 983 (c = ipmr_cache_alloc_unres()) == NULL) {
@@ -977,16 +987,14 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
977 return -ENOBUFS; 987 return -ENOBUFS;
978 } 988 }
979 989
980 /* 990 /* Fill in the new cache entry */
981 * Fill in the new cache entry 991
982 */
983 c->mfc_parent = -1; 992 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr; 993 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr; 994 c->mfc_mcastgrp = iph->daddr;
986 995
987 /* 996 /* Reflect first query at mrouted. */
988 * Reflect first query at mrouted. 997
989 */
990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 998 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991 if (err < 0) { 999 if (err < 0) {
992 /* If the report failed throw the cache entry 1000 /* If the report failed throw the cache entry
@@ -1006,10 +1014,9 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1014 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1007 } 1015 }
1008 1016
1009 /* 1017 /* See if we can append the packet */
1010 * See if we can append the packet 1018
1011 */ 1019 if (c->mfc_un.unres.unresolved.qlen > 3) {
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb); 1020 kfree_skb(skb);
1014 err = -ENOBUFS; 1021 err = -ENOBUFS;
1015 } else { 1022 } else {
@@ -1035,9 +1042,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1042 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1043 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1044 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock); 1045 list_del_rcu(&c->list);
1039 list_del(&c->list);
1040 write_unlock_bh(&mrt_lock);
1041 1046
1042 ipmr_cache_free(c); 1047 ipmr_cache_free(c);
1043 return 0; 1048 return 0;
@@ -1090,9 +1095,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1090 if (!mrtsock) 1095 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC; 1096 c->mfc_flags |= MFC_STATIC;
1092 1097
1093 write_lock_bh(&mrt_lock); 1098 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1095 write_unlock_bh(&mrt_lock);
1096 1099
1097 /* 1100 /*
1098 * Check to see if we resolved a queued list. If so we 1101 * Check to see if we resolved a queued list. If so we
@@ -1130,26 +1133,21 @@ static void mroute_clean_tables(struct mr_table *mrt)
1130 LIST_HEAD(list); 1133 LIST_HEAD(list);
1131 struct mfc_cache *c, *next; 1134 struct mfc_cache *c, *next;
1132 1135
1133 /* 1136 /* Shut down all active vif entries */
1134 * Shut down all active vif entries 1137
1135 */
1136 for (i = 0; i < mrt->maxvif; i++) { 1138 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1139 if (!(mrt->vif_table[i].flags & VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list); 1140 vif_delete(mrt, i, 0, &list);
1139 } 1141 }
1140 unregister_netdevice_many(&list); 1142 unregister_netdevice_many(&list);
1141 1143
1142 /* 1144 /* Wipe the cache */
1143 * Wipe the cache 1145
1144 */
1145 for (i = 0; i < MFC_LINES; i++) { 1146 for (i = 0; i < MFC_LINES; i++) {
1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1147 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147 if (c->mfc_flags&MFC_STATIC) 1148 if (c->mfc_flags & MFC_STATIC)
1148 continue; 1149 continue;
1149 write_lock_bh(&mrt_lock); 1150 list_del_rcu(&c->list);
1150 list_del(&c->list);
1151 write_unlock_bh(&mrt_lock);
1152
1153 ipmr_cache_free(c); 1151 ipmr_cache_free(c);
1154 } 1152 }
1155 } 1153 }
@@ -1164,6 +1162,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
1164 } 1162 }
1165} 1163}
1166 1164
1165/* called from ip_ra_control(), before an RCU grace period,
1166 * we dont need to call synchronize_rcu() here
1167 */
1167static void mrtsock_destruct(struct sock *sk) 1168static void mrtsock_destruct(struct sock *sk)
1168{ 1169{
1169 struct net *net = sock_net(sk); 1170 struct net *net = sock_net(sk);
@@ -1171,13 +1172,9 @@ static void mrtsock_destruct(struct sock *sk)
1171 1172
1172 rtnl_lock(); 1173 rtnl_lock();
1173 ipmr_for_each_table(mrt, net) { 1174 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) { 1175 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1176 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176 1177 rcu_assign_pointer(mrt->mroute_sk, NULL);
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1180
1181 mroute_clean_tables(mrt); 1178 mroute_clean_tables(mrt);
1182 } 1179 }
1183 } 1180 }
@@ -1204,7 +1201,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1204 return -ENOENT; 1201 return -ENOENT;
1205 1202
1206 if (optname != MRT_INIT) { 1203 if (optname != MRT_INIT) {
1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1204 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1205 !capable(CAP_NET_ADMIN))
1208 return -EACCES; 1206 return -EACCES;
1209 } 1207 }
1210 1208
@@ -1217,23 +1215,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1217 return -ENOPROTOOPT; 1215 return -ENOPROTOOPT;
1218 1216
1219 rtnl_lock(); 1217 rtnl_lock();
1220 if (mrt->mroute_sk) { 1218 if (rtnl_dereference(mrt->mroute_sk)) {
1221 rtnl_unlock(); 1219 rtnl_unlock();
1222 return -EADDRINUSE; 1220 return -EADDRINUSE;
1223 } 1221 }
1224 1222
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) { 1224 if (ret == 0) {
1227 write_lock_bh(&mrt_lock); 1225 rcu_assign_pointer(mrt->mroute_sk, sk);
1228 mrt->mroute_sk = sk;
1229 write_unlock_bh(&mrt_lock);
1230
1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232 } 1227 }
1233 rtnl_unlock(); 1228 rtnl_unlock();
1234 return ret; 1229 return ret;
1235 case MRT_DONE: 1230 case MRT_DONE:
1236 if (sk != mrt->mroute_sk) 1231 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1237 return -EACCES; 1232 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL); 1233 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF: 1234 case MRT_ADD_VIF:
@@ -1246,7 +1241,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1246 return -ENFILE; 1241 return -ENFILE;
1247 rtnl_lock(); 1242 rtnl_lock();
1248 if (optname == MRT_ADD_VIF) { 1243 if (optname == MRT_ADD_VIF) {
1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1244 ret = vif_add(net, mrt, &vif,
1245 sk == rtnl_dereference(mrt->mroute_sk));
1250 } else { 1246 } else {
1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252 } 1248 }
@@ -1267,7 +1263,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1267 if (optname == MRT_DEL_MFC) 1263 if (optname == MRT_DEL_MFC)
1268 ret = ipmr_mfc_delete(mrt, &mfc); 1264 ret = ipmr_mfc_delete(mrt, &mfc);
1269 else 1265 else
1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1266 ret = ipmr_mfc_add(net, mrt, &mfc,
1267 sk == rtnl_dereference(mrt->mroute_sk));
1271 rtnl_unlock(); 1268 rtnl_unlock();
1272 return ret; 1269 return ret;
1273 /* 1270 /*
@@ -1276,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1276 case MRT_ASSERT: 1273 case MRT_ASSERT:
1277 { 1274 {
1278 int v; 1275 int v;
1279 if (get_user(v,(int __user *)optval)) 1276 if (get_user(v, (int __user *)optval))
1280 return -EFAULT; 1277 return -EFAULT;
1281 mrt->mroute_do_assert = (v) ? 1 : 0; 1278 mrt->mroute_do_assert = (v) ? 1 : 0;
1282 return 0; 1279 return 0;
@@ -1286,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1286 { 1283 {
1287 int v; 1284 int v;
1288 1285
1289 if (get_user(v,(int __user *)optval)) 1286 if (get_user(v, (int __user *)optval))
1290 return -EFAULT; 1287 return -EFAULT;
1291 v = (v) ? 1 : 0; 1288 v = (v) ? 1 : 0;
1292 1289
@@ -1309,14 +1306,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1309 return -EINVAL; 1306 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval)) 1307 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT; 1308 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314 1309
1315 rtnl_lock(); 1310 rtnl_lock();
1316 ret = 0; 1311 ret = 0;
1317 if (!ipmr_new_table(net, v)) 1312 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1318 ret = -ENOMEM; 1313 ret = -EBUSY;
1319 raw_sk(sk)->ipmr_table = v; 1314 } else {
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 }
1320 rtnl_unlock(); 1319 rtnl_unlock();
1321 return ret; 1320 return ret;
1322 } 1321 }
@@ -1347,9 +1346,9 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1347 1346
1348 if (optname != MRT_VERSION && 1347 if (optname != MRT_VERSION &&
1349#ifdef CONFIG_IP_PIMSM 1348#ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM && 1349 optname != MRT_PIM &&
1351#endif 1350#endif
1352 optname!=MRT_ASSERT) 1351 optname != MRT_ASSERT)
1353 return -ENOPROTOOPT; 1352 return -ENOPROTOOPT;
1354 1353
1355 if (get_user(olr, optlen)) 1354 if (get_user(olr, optlen))
@@ -1416,19 +1415,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1416 if (copy_from_user(&sr, arg, sizeof(sr))) 1415 if (copy_from_user(&sr, arg, sizeof(sr)))
1417 return -EFAULT; 1416 return -EFAULT;
1418 1417
1419 read_lock(&mrt_lock); 1418 rcu_read_lock();
1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1419 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421 if (c) { 1420 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt; 1421 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes; 1422 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if; 1423 sr.wrong_if = c->mfc_un.res.wrong_if;
1425 read_unlock(&mrt_lock); 1424 rcu_read_unlock();
1426 1425
1427 if (copy_to_user(arg, &sr, sizeof(sr))) 1426 if (copy_to_user(arg, &sr, sizeof(sr)))
1428 return -EFAULT; 1427 return -EFAULT;
1429 return 0; 1428 return 0;
1430 } 1429 }
1431 read_unlock(&mrt_lock); 1430 rcu_read_unlock();
1432 return -EADDRNOTAVAIL; 1431 return -EADDRNOTAVAIL;
1433 default: 1432 default:
1434 return -ENOIOCTLCMD; 1433 return -ENOIOCTLCMD;
@@ -1465,7 +1464,7 @@ static struct notifier_block ip_mr_notifier = {
1465}; 1464};
1466 1465
1467/* 1466/*
1468 * Encapsulate a packet by attaching a valid IPIP header to it. 1467 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so 1468 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video. 1469 * important for multicast video.
1471 */ 1470 */
@@ -1480,7 +1479,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1480 skb_reset_network_header(skb); 1479 skb_reset_network_header(skb);
1481 iph = ip_hdr(skb); 1480 iph = ip_hdr(skb);
1482 1481
1483 iph->version = 4; 1482 iph->version = 4;
1484 iph->tos = old_iph->tos; 1483 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl; 1484 iph->ttl = old_iph->ttl;
1486 iph->frag_off = 0; 1485 iph->frag_off = 0;
@@ -1498,7 +1497,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1498 1497
1499static inline int ipmr_forward_finish(struct sk_buff *skb) 1498static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{ 1499{
1501 struct ip_options * opt = &(IPCB(skb)->opt); 1500 struct ip_options *opt = &(IPCB(skb)->opt);
1502 1501
1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1502 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1504 1503
@@ -1535,22 +1534,34 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1535 } 1534 }
1536#endif 1535#endif
1537 1536
1538 if (vif->flags&VIFF_TUNNEL) { 1537 if (vif->flags & VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link, 1538 struct flowi fl = {
1540 .nl_u = { .ip4_u = 1539 .oif = vif->link,
1541 { .daddr = vif->remote, 1540 .nl_u = {
1542 .saddr = vif->local, 1541 .ip4_u = {
1543 .tos = RT_TOS(iph->tos) } }, 1542 .daddr = vif->remote,
1544 .proto = IPPROTO_IPIP }; 1543 .saddr = vif->local,
1544 .tos = RT_TOS(iph->tos)
1545 }
1546 },
1547 .proto = IPPROTO_IPIP
1548 };
1549
1545 if (ip_route_output_key(net, &rt, &fl)) 1550 if (ip_route_output_key(net, &rt, &fl))
1546 goto out_free; 1551 goto out_free;
1547 encap = sizeof(struct iphdr); 1552 encap = sizeof(struct iphdr);
1548 } else { 1553 } else {
1549 struct flowi fl = { .oif = vif->link, 1554 struct flowi fl = {
1550 .nl_u = { .ip4_u = 1555 .oif = vif->link,
1551 { .daddr = iph->daddr, 1556 .nl_u = {
1552 .tos = RT_TOS(iph->tos) } }, 1557 .ip4_u = {
1553 .proto = IPPROTO_IPIP }; 1558 .daddr = iph->daddr,
1559 .tos = RT_TOS(iph->tos)
1560 }
1561 },
1562 .proto = IPPROTO_IPIP
1563 };
1564
1554 if (ip_route_output_key(net, &rt, &fl)) 1565 if (ip_route_output_key(net, &rt, &fl))
1555 goto out_free; 1566 goto out_free;
1556 } 1567 }
@@ -1559,8 +1570,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1559 1570
1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1571 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not 1572 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear 1573 * allow to send ICMP, so that packets will disappear
1563 to blackhole. 1574 * to blackhole.
1564 */ 1575 */
1565 1576
1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1577 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -1583,7 +1594,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1583 ip_decrease_ttl(ip_hdr(skb)); 1594 ip_decrease_ttl(ip_hdr(skb));
1584 1595
1585 /* FIXME: forward and output firewalls used to be called here. 1596 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */ 1597 * What do we do with netfilter? -- RR
1598 */
1587 if (vif->flags & VIFF_TUNNEL) { 1599 if (vif->flags & VIFF_TUNNEL) {
1588 ip_encap(skb, vif->local, vif->remote); 1600 ip_encap(skb, vif->local, vif->remote);
1589 /* FIXME: extra output firewall step used to be here. --RR */ 1601 /* FIXME: extra output firewall step used to be here. --RR */
@@ -1644,15 +1656,15 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1644 1656
1645 if (skb_rtable(skb)->fl.iif == 0) { 1657 if (skb_rtable(skb)->fl.iif == 0) {
1646 /* It is our own packet, looped back. 1658 /* It is our own packet, looped back.
1647 Very complicated situation... 1659 * Very complicated situation...
1648 1660 *
1649 The best workaround until routing daemons will be 1661 * The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was 1662 * fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that 1663 * send through wrong interface. It means, that
1652 multicast applications WILL NOT work for 1664 * multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing 1665 * (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good 1666 * to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router. 1667 * idea to use multicasting applications on router.
1656 */ 1668 */
1657 goto dont_forward; 1669 goto dont_forward;
1658 } 1670 }
@@ -1662,9 +1674,9 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1662 1674
1663 if (true_vifi >= 0 && mrt->mroute_do_assert && 1675 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1664 /* pimsm uses asserts, when switching from RPT to SPT, 1676 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif. 1677 * so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty 1678 * It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK 1679 * large chunk of pimd to kernel. Ough... --ANK
1668 */ 1680 */
1669 (mrt->mroute_do_pim || 1681 (mrt->mroute_do_pim ||
1670 cache->mfc_un.res.ttls[true_vifi] < 255) && 1682 cache->mfc_un.res.ttls[true_vifi] < 255) &&
@@ -1682,10 +1694,12 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1682 /* 1694 /*
1683 * Forward the frame 1695 * Forward the frame
1684 */ 1696 */
1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1697 for (ct = cache->mfc_un.res.maxvif - 1;
1698 ct >= cache->mfc_un.res.minvif; ct--) {
1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1699 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1687 if (psend != -1) { 1700 if (psend != -1) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1701 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1702
1689 if (skb2) 1703 if (skb2)
1690 ipmr_queue_xmit(net, mrt, skb2, cache, 1704 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend); 1705 psend);
@@ -1696,6 +1710,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1696 if (psend != -1) { 1710 if (psend != -1) {
1697 if (local) { 1711 if (local) {
1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1712 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1713
1699 if (skb2) 1714 if (skb2)
1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1715 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1701 } else { 1716 } else {
@@ -1713,6 +1728,7 @@ dont_forward:
1713 1728
1714/* 1729/*
1715 * Multicast packets for forwarding arrive here 1730 * Multicast packets for forwarding arrive here
1731 * Called with rcu_read_lock();
1716 */ 1732 */
1717 1733
1718int ip_mr_input(struct sk_buff *skb) 1734int ip_mr_input(struct sk_buff *skb)
@@ -1724,9 +1740,9 @@ int ip_mr_input(struct sk_buff *skb)
1724 int err; 1740 int err;
1725 1741
1726 /* Packet is looped back after forward, it should not be 1742 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally. 1743 * forwarded second time, but still can be delivered locally.
1728 */ 1744 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1745 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1730 goto dont_forward; 1746 goto dont_forward;
1731 1747
1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1748 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,28 +1752,28 @@ int ip_mr_input(struct sk_buff *skb)
1736 } 1752 }
1737 1753
1738 if (!local) { 1754 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) { 1755 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb)) 1756 if (ip_call_ra_chain(skb))
1741 return 0; 1757 return 0;
1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1758 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of 1759 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert 1760 * Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable 1761 * option to IGMP packets destined to routable
1746 groups. It is very bad, because it means 1762 * groups. It is very bad, because it means
1747 that we can forward NO IGMP messages. 1763 * that we can forward NO IGMP messages.
1748 */ 1764 */
1749 read_lock(&mrt_lock); 1765 struct sock *mroute_sk;
1750 if (mrt->mroute_sk) { 1766
1751 nf_reset(skb); 1767 mroute_sk = rcu_dereference(mrt->mroute_sk);
1752 raw_rcv(mrt->mroute_sk, skb); 1768 if (mroute_sk) {
1753 read_unlock(&mrt_lock); 1769 nf_reset(skb);
1754 return 0; 1770 raw_rcv(mroute_sk, skb);
1755 } 1771 return 0;
1756 read_unlock(&mrt_lock); 1772 }
1757 } 1773 }
1758 } 1774 }
1759 1775
1760 read_lock(&mrt_lock); 1776 /* already under rcu_read_lock() */
1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1777 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1762 1778
1763 /* 1779 /*
@@ -1769,13 +1785,12 @@ int ip_mr_input(struct sk_buff *skb)
1769 if (local) { 1785 if (local) {
1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1786 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771 ip_local_deliver(skb); 1787 ip_local_deliver(skb);
1772 if (skb2 == NULL) { 1788 if (skb2 == NULL)
1773 read_unlock(&mrt_lock);
1774 return -ENOBUFS; 1789 return -ENOBUFS;
1775 }
1776 skb = skb2; 1790 skb = skb2;
1777 } 1791 }
1778 1792
1793 read_lock(&mrt_lock);
1779 vif = ipmr_find_vif(mrt, skb->dev); 1794 vif = ipmr_find_vif(mrt, skb->dev);
1780 if (vif >= 0) { 1795 if (vif >= 0) {
1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1796 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1788,8 +1803,8 @@ int ip_mr_input(struct sk_buff *skb)
1788 return -ENODEV; 1803 return -ENODEV;
1789 } 1804 }
1790 1805
1806 read_lock(&mrt_lock);
1791 ip_mr_forward(net, mrt, skb, cache, local); 1807 ip_mr_forward(net, mrt, skb, cache, local);
1792
1793 read_unlock(&mrt_lock); 1808 read_unlock(&mrt_lock);
1794 1809
1795 if (local) 1810 if (local)
@@ -1805,6 +1820,7 @@ dont_forward:
1805} 1820}
1806 1821
1807#ifdef CONFIG_IP_PIMSM 1822#ifdef CONFIG_IP_PIMSM
1823/* called with rcu_read_lock() */
1808static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1824static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809 unsigned int pimlen) 1825 unsigned int pimlen)
1810{ 1826{
@@ -1813,10 +1829,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1813 1829
1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1830 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1815 /* 1831 /*
1816 Check that: 1832 * Check that:
1817 a. packet is really destinted to a multicast group 1833 * a. packet is really sent to a multicast group
1818 b. packet is not a NULL-REGISTER 1834 * b. packet is not a NULL-REGISTER
1819 c. packet is not truncated 1835 * c. packet is not truncated
1820 */ 1836 */
1821 if (!ipv4_is_multicast(encap->daddr) || 1837 if (!ipv4_is_multicast(encap->daddr) ||
1822 encap->tot_len == 0 || 1838 encap->tot_len == 0 ||
@@ -1826,26 +1842,23 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1826 read_lock(&mrt_lock); 1842 read_lock(&mrt_lock);
1827 if (mrt->mroute_reg_vif_num >= 0) 1843 if (mrt->mroute_reg_vif_num >= 0)
1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1844 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1829 if (reg_dev)
1830 dev_hold(reg_dev);
1831 read_unlock(&mrt_lock); 1845 read_unlock(&mrt_lock);
1832 1846
1833 if (reg_dev == NULL) 1847 if (reg_dev == NULL)
1834 return 1; 1848 return 1;
1835 1849
1836 skb->mac_header = skb->network_header; 1850 skb->mac_header = skb->network_header;
1837 skb_pull(skb, (u8*)encap - skb->data); 1851 skb_pull(skb, (u8 *)encap - skb->data);
1838 skb_reset_network_header(skb); 1852 skb_reset_network_header(skb);
1839 skb->protocol = htons(ETH_P_IP); 1853 skb->protocol = htons(ETH_P_IP);
1840 skb->ip_summed = 0; 1854 skb->ip_summed = CHECKSUM_NONE;
1841 skb->pkt_type = PACKET_HOST; 1855 skb->pkt_type = PACKET_HOST;
1842 1856
1843 skb_tunnel_rx(skb, reg_dev); 1857 skb_tunnel_rx(skb, reg_dev);
1844 1858
1845 netif_rx(skb); 1859 netif_rx(skb);
1846 dev_put(reg_dev);
1847 1860
1848 return 0; 1861 return NET_RX_SUCCESS;
1849} 1862}
1850#endif 1863#endif
1851 1864
@@ -1854,7 +1867,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1854 * Handle IGMP messages of PIMv1 1867 * Handle IGMP messages of PIMv1
1855 */ 1868 */
1856 1869
1857int pim_rcv_v1(struct sk_buff * skb) 1870int pim_rcv_v1(struct sk_buff *skb)
1858{ 1871{
1859 struct igmphdr *pim; 1872 struct igmphdr *pim;
1860 struct net *net = dev_net(skb->dev); 1873 struct net *net = dev_net(skb->dev);
@@ -1881,7 +1894,7 @@ drop:
1881#endif 1894#endif
1882 1895
1883#ifdef CONFIG_IP_PIMSM_V2 1896#ifdef CONFIG_IP_PIMSM_V2
1884static int pim_rcv(struct sk_buff * skb) 1897static int pim_rcv(struct sk_buff *skb)
1885{ 1898{
1886 struct pimreghdr *pim; 1899 struct pimreghdr *pim;
1887 struct net *net = dev_net(skb->dev); 1900 struct net *net = dev_net(skb->dev);
@@ -1891,8 +1904,8 @@ static int pim_rcv(struct sk_buff * skb)
1891 goto drop; 1904 goto drop;
1892 1905
1893 pim = (struct pimreghdr *)skb_transport_header(skb); 1906 pim = (struct pimreghdr *)skb_transport_header(skb);
1894 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1907 if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
1895 (pim->flags&PIM_NULL_REGISTER) || 1908 (pim->flags & PIM_NULL_REGISTER) ||
1896 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1909 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1897 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1910 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1898 goto drop; 1911 goto drop;
@@ -1958,28 +1971,33 @@ int ipmr_get_route(struct net *net,
1958 if (mrt == NULL) 1971 if (mrt == NULL)
1959 return -ENOENT; 1972 return -ENOENT;
1960 1973
1961 read_lock(&mrt_lock); 1974 rcu_read_lock();
1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1975 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1963 1976
1964 if (cache == NULL) { 1977 if (cache == NULL) {
1965 struct sk_buff *skb2; 1978 struct sk_buff *skb2;
1966 struct iphdr *iph; 1979 struct iphdr *iph;
1967 struct net_device *dev; 1980 struct net_device *dev;
1968 int vif; 1981 int vif = -1;
1969 1982
1970 if (nowait) { 1983 if (nowait) {
1971 read_unlock(&mrt_lock); 1984 rcu_read_unlock();
1972 return -EAGAIN; 1985 return -EAGAIN;
1973 } 1986 }
1974 1987
1975 dev = skb->dev; 1988 dev = skb->dev;
1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1989 read_lock(&mrt_lock);
1990 if (dev)
1991 vif = ipmr_find_vif(mrt, dev);
1992 if (vif < 0) {
1977 read_unlock(&mrt_lock); 1993 read_unlock(&mrt_lock);
1994 rcu_read_unlock();
1978 return -ENODEV; 1995 return -ENODEV;
1979 } 1996 }
1980 skb2 = skb_clone(skb, GFP_ATOMIC); 1997 skb2 = skb_clone(skb, GFP_ATOMIC);
1981 if (!skb2) { 1998 if (!skb2) {
1982 read_unlock(&mrt_lock); 1999 read_unlock(&mrt_lock);
2000 rcu_read_unlock();
1983 return -ENOMEM; 2001 return -ENOMEM;
1984 } 2002 }
1985 2003
@@ -1992,13 +2010,16 @@ int ipmr_get_route(struct net *net,
1992 iph->version = 0; 2010 iph->version = 0;
1993 err = ipmr_cache_unresolved(mrt, vif, skb2); 2011 err = ipmr_cache_unresolved(mrt, vif, skb2);
1994 read_unlock(&mrt_lock); 2012 read_unlock(&mrt_lock);
2013 rcu_read_unlock();
1995 return err; 2014 return err;
1996 } 2015 }
1997 2016
1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2017 read_lock(&mrt_lock);
2018 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
1999 cache->mfc_flags |= MFC_NOTIFY; 2019 cache->mfc_flags |= MFC_NOTIFY;
2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2020 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2001 read_unlock(&mrt_lock); 2021 read_unlock(&mrt_lock);
2022 rcu_read_unlock();
2002 return err; 2023 return err;
2003} 2024}
2004 2025
@@ -2050,14 +2071,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2050 s_h = cb->args[1]; 2071 s_h = cb->args[1];
2051 s_e = cb->args[2]; 2072 s_e = cb->args[2];
2052 2073
2053 read_lock(&mrt_lock); 2074 rcu_read_lock();
2054 ipmr_for_each_table(mrt, net) { 2075 ipmr_for_each_table(mrt, net) {
2055 if (t < s_t) 2076 if (t < s_t)
2056 goto next_table; 2077 goto next_table;
2057 if (t > s_t) 2078 if (t > s_t)
2058 s_h = 0; 2079 s_h = 0;
2059 for (h = s_h; h < MFC_LINES; h++) { 2080 for (h = s_h; h < MFC_LINES; h++) {
2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2081 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
2061 if (e < s_e) 2082 if (e < s_e)
2062 goto next_entry; 2083 goto next_entry;
2063 if (ipmr_fill_mroute(mrt, skb, 2084 if (ipmr_fill_mroute(mrt, skb,
@@ -2075,7 +2096,7 @@ next_table:
2075 t++; 2096 t++;
2076 } 2097 }
2077done: 2098done:
2078 read_unlock(&mrt_lock); 2099 rcu_read_unlock();
2079 2100
2080 cb->args[2] = e; 2101 cb->args[2] = e;
2081 cb->args[1] = h; 2102 cb->args[1] = h;
@@ -2086,7 +2107,8 @@ done:
2086 2107
2087#ifdef CONFIG_PROC_FS 2108#ifdef CONFIG_PROC_FS
2088/* 2109/*
2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2110 * The /proc interfaces to multicast routing :
2111 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2090 */ 2112 */
2091struct ipmr_vif_iter { 2113struct ipmr_vif_iter {
2092 struct seq_net_private p; 2114 struct seq_net_private p;
@@ -2208,14 +2230,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2208 struct mr_table *mrt = it->mrt; 2230 struct mr_table *mrt = it->mrt;
2209 struct mfc_cache *mfc; 2231 struct mfc_cache *mfc;
2210 2232
2211 read_lock(&mrt_lock); 2233 rcu_read_lock();
2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2234 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2213 it->cache = &mrt->mfc_cache_array[it->ct]; 2235 it->cache = &mrt->mfc_cache_array[it->ct];
2214 list_for_each_entry(mfc, it->cache, list) 2236 list_for_each_entry_rcu(mfc, it->cache, list)
2215 if (pos-- == 0) 2237 if (pos-- == 0)
2216 return mfc; 2238 return mfc;
2217 } 2239 }
2218 read_unlock(&mrt_lock); 2240 rcu_read_unlock();
2219 2241
2220 spin_lock_bh(&mfc_unres_lock); 2242 spin_lock_bh(&mfc_unres_lock);
2221 it->cache = &mrt->mfc_unres_queue; 2243 it->cache = &mrt->mfc_unres_queue;
@@ -2274,7 +2296,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2274 } 2296 }
2275 2297
2276 /* exhausted cache_array, show unresolved */ 2298 /* exhausted cache_array, show unresolved */
2277 read_unlock(&mrt_lock); 2299 rcu_read_unlock();
2278 it->cache = &mrt->mfc_unres_queue; 2300 it->cache = &mrt->mfc_unres_queue;
2279 it->ct = 0; 2301 it->ct = 0;
2280 2302
@@ -2282,7 +2304,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2282 if (!list_empty(it->cache)) 2304 if (!list_empty(it->cache))
2283 return list_first_entry(it->cache, struct mfc_cache, list); 2305 return list_first_entry(it->cache, struct mfc_cache, list);
2284 2306
2285 end_of_list: 2307end_of_list:
2286 spin_unlock_bh(&mfc_unres_lock); 2308 spin_unlock_bh(&mfc_unres_lock);
2287 it->cache = NULL; 2309 it->cache = NULL;
2288 2310
@@ -2297,7 +2319,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2297 if (it->cache == &mrt->mfc_unres_queue) 2319 if (it->cache == &mrt->mfc_unres_queue)
2298 spin_unlock_bh(&mfc_unres_lock); 2320 spin_unlock_bh(&mfc_unres_lock);
2299 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2321 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2300 read_unlock(&mrt_lock); 2322 rcu_read_unlock();
2301} 2323}
2302 2324
2303static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2325static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2323,7 +2345,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2323 mfc->mfc_un.res.bytes, 2345 mfc->mfc_un.res.bytes,
2324 mfc->mfc_un.res.wrong_if); 2346 mfc->mfc_un.res.wrong_if);
2325 for (n = mfc->mfc_un.res.minvif; 2347 for (n = mfc->mfc_un.res.minvif;
2326 n < mfc->mfc_un.res.maxvif; n++ ) { 2348 n < mfc->mfc_un.res.maxvif; n++) {
2327 if (VIF_EXISTS(mrt, n) && 2349 if (VIF_EXISTS(mrt, n) &&
2328 mfc->mfc_un.res.ttls[n] < 255) 2350 mfc->mfc_un.res.ttls[n] < 255)
2329 seq_printf(seq, 2351 seq_printf(seq,
@@ -2421,7 +2443,7 @@ int __init ip_mr_init(void)
2421 2443
2422 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2444 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2423 sizeof(struct mfc_cache), 2445 sizeof(struct mfc_cache),
2424 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2446 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2425 NULL); 2447 NULL);
2426 if (!mrt_cachep) 2448 if (!mrt_cachep)
2427 return -ENOMEM; 2449 return -ENOMEM;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1833bdbf9805..babd1a2bae5f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -147,7 +147,7 @@ config IP_NF_TARGET_ULOG
147 which can only be viewed through syslog. 147 which can only be viewed through syslog.
148 148
149 The appropriate userspace logging daemon (ulogd) may be obtained from 149 The appropriate userspace logging daemon (ulogd) may be obtained from
150 <http://www.gnumonks.org/projects/ulogd/> 150 <http://www.netfilter.org/projects/ulogd/index.html>
151 151
152 To compile it as a module, choose M here. If unsure, say N. 152 To compile it as a module, choose M here. If unsure, say N.
153 153
@@ -324,10 +324,10 @@ config IP_NF_TARGET_ECN
324 324
325config IP_NF_TARGET_TTL 325config IP_NF_TARGET_TTL
326 tristate '"TTL" target support' 326 tristate '"TTL" target support'
327 depends on NETFILTER_ADVANCED 327 depends on NETFILTER_ADVANCED && IP_NF_MANGLE
328 select NETFILTER_XT_TARGET_HL 328 select NETFILTER_XT_TARGET_HL
329 ---help--- 329 ---help---
330 This is a backwards-compat option for the user's convenience 330 This is a backwards-compatible option for the user's convenience
331 (e.g. when running oldconfig). It selects 331 (e.g. when running oldconfig). It selects
332 CONFIG_NETFILTER_XT_TARGET_HL. 332 CONFIG_NETFILTER_XT_TARGET_HL.
333 333
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f12..3fac340a28d5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
72 for (i = 0; i < len; i++) 72 for (i = 0; i < len; i++)
73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; 73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
74 74
75 return (ret != 0); 75 return ret != 0;
76} 76}
77 77
78/* 78/*
@@ -228,7 +228,7 @@ arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
228 return NF_DROP; 228 return NF_DROP;
229} 229}
230 230
231static inline const struct arpt_entry_target * 231static inline const struct xt_entry_target *
232arpt_get_target_c(const struct arpt_entry *e) 232arpt_get_target_c(const struct arpt_entry *e)
233{ 233{
234 return arpt_get_target((struct arpt_entry *)e); 234 return arpt_get_target((struct arpt_entry *)e);
@@ -282,7 +282,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
282 282
283 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
284 do { 284 do {
285 const struct arpt_entry_target *t; 285 const struct xt_entry_target *t;
286 286
287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
288 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
@@ -297,10 +297,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
297 if (!t->u.kernel.target->target) { 297 if (!t->u.kernel.target->target) {
298 int v; 298 int v;
299 299
300 v = ((struct arpt_standard_target *)t)->verdict; 300 v = ((struct xt_standard_target *)t)->verdict;
301 if (v < 0) { 301 if (v < 0) {
302 /* Pop from stack? */ 302 /* Pop from stack? */
303 if (v != ARPT_RETURN) { 303 if (v != XT_RETURN) {
304 verdict = (unsigned)(-v) - 1; 304 verdict = (unsigned)(-v) - 1;
305 break; 305 break;
306 } 306 }
@@ -332,7 +332,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
332 /* Target might have changed stuff. */ 332 /* Target might have changed stuff. */
333 arp = arp_hdr(skb); 333 arp = arp_hdr(skb);
334 334
335 if (verdict == ARPT_CONTINUE) 335 if (verdict == XT_CONTINUE)
336 e = arpt_next_entry(e); 336 e = arpt_next_entry(e);
337 else 337 else
338 /* Verdict */ 338 /* Verdict */
@@ -377,7 +377,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
377 e->counters.pcnt = pos; 377 e->counters.pcnt = pos;
378 378
379 for (;;) { 379 for (;;) {
380 const struct arpt_standard_target *t 380 const struct xt_standard_target *t
381 = (void *)arpt_get_target_c(e); 381 = (void *)arpt_get_target_c(e);
382 int visited = e->comefrom & (1 << hook); 382 int visited = e->comefrom & (1 << hook);
383 383
@@ -392,13 +392,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
392 /* Unconditional return/END. */ 392 /* Unconditional return/END. */
393 if ((e->target_offset == sizeof(struct arpt_entry) && 393 if ((e->target_offset == sizeof(struct arpt_entry) &&
394 (strcmp(t->target.u.user.name, 394 (strcmp(t->target.u.user.name,
395 ARPT_STANDARD_TARGET) == 0) && 395 XT_STANDARD_TARGET) == 0) &&
396 t->verdict < 0 && unconditional(&e->arp)) || 396 t->verdict < 0 && unconditional(&e->arp)) ||
397 visited) { 397 visited) {
398 unsigned int oldpos, size; 398 unsigned int oldpos, size;
399 399
400 if ((strcmp(t->target.u.user.name, 400 if ((strcmp(t->target.u.user.name,
401 ARPT_STANDARD_TARGET) == 0) && 401 XT_STANDARD_TARGET) == 0) &&
402 t->verdict < -NF_MAX_VERDICT - 1) { 402 t->verdict < -NF_MAX_VERDICT - 1) {
403 duprintf("mark_source_chains: bad " 403 duprintf("mark_source_chains: bad "
404 "negative verdict (%i)\n", 404 "negative verdict (%i)\n",
@@ -433,7 +433,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
433 int newpos = t->verdict; 433 int newpos = t->verdict;
434 434
435 if (strcmp(t->target.u.user.name, 435 if (strcmp(t->target.u.user.name,
436 ARPT_STANDARD_TARGET) == 0 && 436 XT_STANDARD_TARGET) == 0 &&
437 newpos >= 0) { 437 newpos >= 0) {
438 if (newpos > newinfo->size - 438 if (newpos > newinfo->size -
439 sizeof(struct arpt_entry)) { 439 sizeof(struct arpt_entry)) {
@@ -464,14 +464,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
464 464
465static inline int check_entry(const struct arpt_entry *e, const char *name) 465static inline int check_entry(const struct arpt_entry *e, const char *name)
466{ 466{
467 const struct arpt_entry_target *t; 467 const struct xt_entry_target *t;
468 468
469 if (!arp_checkentry(&e->arp)) { 469 if (!arp_checkentry(&e->arp)) {
470 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 470 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
471 return -EINVAL; 471 return -EINVAL;
472 } 472 }
473 473
474 if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) 474 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
475 return -EINVAL; 475 return -EINVAL;
476 476
477 t = arpt_get_target_c(e); 477 t = arpt_get_target_c(e);
@@ -483,7 +483,7 @@ static inline int check_entry(const struct arpt_entry *e, const char *name)
483 483
484static inline int check_target(struct arpt_entry *e, const char *name) 484static inline int check_target(struct arpt_entry *e, const char *name)
485{ 485{
486 struct arpt_entry_target *t = arpt_get_target(e); 486 struct xt_entry_target *t = arpt_get_target(e);
487 int ret; 487 int ret;
488 struct xt_tgchk_param par = { 488 struct xt_tgchk_param par = {
489 .table = name, 489 .table = name,
@@ -506,7 +506,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
506static inline int 506static inline int
507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) 507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
508{ 508{
509 struct arpt_entry_target *t; 509 struct xt_entry_target *t;
510 struct xt_target *target; 510 struct xt_target *target;
511 int ret; 511 int ret;
512 512
@@ -536,7 +536,7 @@ out:
536 536
537static bool check_underflow(const struct arpt_entry *e) 537static bool check_underflow(const struct arpt_entry *e)
538{ 538{
539 const struct arpt_entry_target *t; 539 const struct xt_entry_target *t;
540 unsigned int verdict; 540 unsigned int verdict;
541 541
542 if (!unconditional(&e->arp)) 542 if (!unconditional(&e->arp))
@@ -544,7 +544,7 @@ static bool check_underflow(const struct arpt_entry *e)
544 t = arpt_get_target_c(e); 544 t = arpt_get_target_c(e);
545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
546 return false; 546 return false;
547 verdict = ((struct arpt_standard_target *)t)->verdict; 547 verdict = ((struct xt_standard_target *)t)->verdict;
548 verdict = -verdict - 1; 548 verdict = -verdict - 1;
549 return verdict == NF_DROP || verdict == NF_ACCEPT; 549 return verdict == NF_DROP || verdict == NF_ACCEPT;
550} 550}
@@ -566,7 +566,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
566 } 566 }
567 567
568 if (e->next_offset 568 if (e->next_offset
569 < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) { 569 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
570 duprintf("checking: element %p size %u\n", 570 duprintf("checking: element %p size %u\n",
571 e, e->next_offset); 571 e, e->next_offset);
572 return -EINVAL; 572 return -EINVAL;
@@ -598,7 +598,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
598static inline void cleanup_entry(struct arpt_entry *e) 598static inline void cleanup_entry(struct arpt_entry *e)
599{ 599{
600 struct xt_tgdtor_param par; 600 struct xt_tgdtor_param par;
601 struct arpt_entry_target *t; 601 struct xt_entry_target *t;
602 602
603 t = arpt_get_target(e); 603 t = arpt_get_target(e);
604 par.target = t->u.kernel.target; 604 par.target = t->u.kernel.target;
@@ -794,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size,
794 /* FIXME: use iterator macros --RR */ 794 /* FIXME: use iterator macros --RR */
795 /* ... then go back and fix counters and names */ 795 /* ... then go back and fix counters and names */
796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
797 const struct arpt_entry_target *t; 797 const struct xt_entry_target *t;
798 798
799 e = (struct arpt_entry *)(loc_cpu_entry + off); 799 e = (struct arpt_entry *)(loc_cpu_entry + off);
800 if (copy_to_user(userptr + off 800 if (copy_to_user(userptr + off
@@ -807,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size,
807 807
808 t = arpt_get_target_c(e); 808 t = arpt_get_target_c(e);
809 if (copy_to_user(userptr + off + e->target_offset 809 if (copy_to_user(userptr + off + e->target_offset
810 + offsetof(struct arpt_entry_target, 810 + offsetof(struct xt_entry_target,
811 u.user.name), 811 u.user.name),
812 t->u.kernel.target->name, 812 t->u.kernel.target->name,
813 strlen(t->u.kernel.target->name)+1) != 0) { 813 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -844,7 +844,7 @@ static int compat_calc_entry(const struct arpt_entry *e,
844 const struct xt_table_info *info, 844 const struct xt_table_info *info,
845 const void *base, struct xt_table_info *newinfo) 845 const void *base, struct xt_table_info *newinfo)
846{ 846{
847 const struct arpt_entry_target *t; 847 const struct xt_entry_target *t;
848 unsigned int entry_offset; 848 unsigned int entry_offset;
849 int off, i, ret; 849 int off, i, ret;
850 850
@@ -895,7 +895,7 @@ static int compat_table_info(const struct xt_table_info *info,
895static int get_info(struct net *net, void __user *user, 895static int get_info(struct net *net, void __user *user,
896 const int *len, int compat) 896 const int *len, int compat)
897{ 897{
898 char name[ARPT_TABLE_MAXNAMELEN]; 898 char name[XT_TABLE_MAXNAMELEN];
899 struct xt_table *t; 899 struct xt_table *t;
900 int ret; 900 int ret;
901 901
@@ -908,7 +908,7 @@ static int get_info(struct net *net, void __user *user,
908 if (copy_from_user(name, user, sizeof(name)) != 0) 908 if (copy_from_user(name, user, sizeof(name)) != 0)
909 return -EFAULT; 909 return -EFAULT;
910 910
911 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 911 name[XT_TABLE_MAXNAMELEN-1] = '\0';
912#ifdef CONFIG_COMPAT 912#ifdef CONFIG_COMPAT
913 if (compat) 913 if (compat)
914 xt_compat_lock(NFPROTO_ARP); 914 xt_compat_lock(NFPROTO_ARP);
@@ -927,6 +927,7 @@ static int get_info(struct net *net, void __user *user,
927 private = &tmp; 927 private = &tmp;
928 } 928 }
929#endif 929#endif
930 memset(&info, 0, sizeof(info));
930 info.valid_hooks = t->valid_hooks; 931 info.valid_hooks = t->valid_hooks;
931 memcpy(info.hook_entry, private->hook_entry, 932 memcpy(info.hook_entry, private->hook_entry,
932 sizeof(info.hook_entry)); 933 sizeof(info.hook_entry));
@@ -1204,7 +1205,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1204#ifdef CONFIG_COMPAT 1205#ifdef CONFIG_COMPAT
1205static inline void compat_release_entry(struct compat_arpt_entry *e) 1206static inline void compat_release_entry(struct compat_arpt_entry *e)
1206{ 1207{
1207 struct arpt_entry_target *t; 1208 struct xt_entry_target *t;
1208 1209
1209 t = compat_arpt_get_target(e); 1210 t = compat_arpt_get_target(e);
1210 module_put(t->u.kernel.target->me); 1211 module_put(t->u.kernel.target->me);
@@ -1220,7 +1221,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1220 const unsigned int *underflows, 1221 const unsigned int *underflows,
1221 const char *name) 1222 const char *name)
1222{ 1223{
1223 struct arpt_entry_target *t; 1224 struct xt_entry_target *t;
1224 struct xt_target *target; 1225 struct xt_target *target;
1225 unsigned int entry_offset; 1226 unsigned int entry_offset;
1226 int ret, off, h; 1227 int ret, off, h;
@@ -1288,7 +1289,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1288 unsigned int *size, const char *name, 1289 unsigned int *size, const char *name,
1289 struct xt_table_info *newinfo, unsigned char *base) 1290 struct xt_table_info *newinfo, unsigned char *base)
1290{ 1291{
1291 struct arpt_entry_target *t; 1292 struct xt_entry_target *t;
1292 struct xt_target *target; 1293 struct xt_target *target;
1293 struct arpt_entry *de; 1294 struct arpt_entry *de;
1294 unsigned int origsize; 1295 unsigned int origsize;
@@ -1474,7 +1475,7 @@ out_unlock:
1474} 1475}
1475 1476
1476struct compat_arpt_replace { 1477struct compat_arpt_replace {
1477 char name[ARPT_TABLE_MAXNAMELEN]; 1478 char name[XT_TABLE_MAXNAMELEN];
1478 u32 valid_hooks; 1479 u32 valid_hooks;
1479 u32 num_entries; 1480 u32 num_entries;
1480 u32 size; 1481 u32 size;
@@ -1567,7 +1568,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1567 struct xt_counters *counters, 1568 struct xt_counters *counters,
1568 unsigned int i) 1569 unsigned int i)
1569{ 1570{
1570 struct arpt_entry_target *t; 1571 struct xt_entry_target *t;
1571 struct compat_arpt_entry __user *ce; 1572 struct compat_arpt_entry __user *ce;
1572 u_int16_t target_offset, next_offset; 1573 u_int16_t target_offset, next_offset;
1573 compat_uint_t origsize; 1574 compat_uint_t origsize;
@@ -1628,7 +1629,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1628} 1629}
1629 1630
1630struct compat_arpt_get_entries { 1631struct compat_arpt_get_entries {
1631 char name[ARPT_TABLE_MAXNAMELEN]; 1632 char name[XT_TABLE_MAXNAMELEN];
1632 compat_uint_t size; 1633 compat_uint_t size;
1633 struct compat_arpt_entry entrytable[0]; 1634 struct compat_arpt_entry entrytable[0];
1634}; 1635};
@@ -1828,7 +1829,7 @@ void arpt_unregister_table(struct xt_table *table)
1828/* The built-in targets: standard (NULL) and error. */ 1829/* The built-in targets: standard (NULL) and error. */
1829static struct xt_target arpt_builtin_tg[] __read_mostly = { 1830static struct xt_target arpt_builtin_tg[] __read_mostly = {
1830 { 1831 {
1831 .name = ARPT_STANDARD_TARGET, 1832 .name = XT_STANDARD_TARGET,
1832 .targetsize = sizeof(int), 1833 .targetsize = sizeof(int),
1833 .family = NFPROTO_ARP, 1834 .family = NFPROTO_ARP,
1834#ifdef CONFIG_COMPAT 1835#ifdef CONFIG_COMPAT
@@ -1838,9 +1839,9 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
1838#endif 1839#endif
1839 }, 1840 },
1840 { 1841 {
1841 .name = ARPT_ERROR_TARGET, 1842 .name = XT_ERROR_TARGET,
1842 .target = arpt_error, 1843 .target = arpt_error,
1843 .targetsize = ARPT_FUNCTION_MAXNAMELEN, 1844 .targetsize = XT_FUNCTION_MAXNAMELEN,
1844 .family = NFPROTO_ARP, 1845 .family = NFPROTO_ARP,
1845 }, 1846 },
1846}; 1847};
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index e1be7dd1171b..b8ddcc480ed9 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -63,7 +63,7 @@ static int checkentry(const struct xt_tgchk_param *par)
63 return false; 63 return false;
64 64
65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && 65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
66 mangle->target != ARPT_CONTINUE) 66 mangle->target != XT_CONTINUE)
67 return false; 67 return false;
68 return true; 68 return true;
69} 69}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d163f2e3b2e9..a846d633b3b6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,7 +186,7 @@ static inline bool unconditional(const struct ipt_ip *ip)
186} 186}
187 187
188/* for const-correctness */ 188/* for const-correctness */
189static inline const struct ipt_entry_target * 189static inline const struct xt_entry_target *
190ipt_get_target_c(const struct ipt_entry *e) 190ipt_get_target_c(const struct ipt_entry *e)
191{ 191{
192 return ipt_get_target((struct ipt_entry *)e); 192 return ipt_get_target((struct ipt_entry *)e);
@@ -230,9 +230,9 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
230 const char *hookname, const char **chainname, 230 const char *hookname, const char **chainname,
231 const char **comment, unsigned int *rulenum) 231 const char **comment, unsigned int *rulenum)
232{ 232{
233 const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); 233 const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
234 234
235 if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { 235 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
236 /* Head of user chain: ERROR target with chainname */ 236 /* Head of user chain: ERROR target with chainname */
237 *chainname = t->target.data; 237 *chainname = t->target.data;
238 (*rulenum) = 0; 238 (*rulenum) = 0;
@@ -241,7 +241,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
241 241
242 if (s->target_offset == sizeof(struct ipt_entry) && 242 if (s->target_offset == sizeof(struct ipt_entry) &&
243 strcmp(t->target.u.kernel.target->name, 243 strcmp(t->target.u.kernel.target->name,
244 IPT_STANDARD_TARGET) == 0 && 244 XT_STANDARD_TARGET) == 0 &&
245 t->verdict < 0 && 245 t->verdict < 0 &&
246 unconditional(&s->ip)) { 246 unconditional(&s->ip)) {
247 /* Tail of chains: STANDARD target (return/policy) */ 247 /* Tail of chains: STANDARD target (return/policy) */
@@ -346,7 +346,7 @@ ipt_do_table(struct sk_buff *skb,
346 get_entry(table_base, private->underflow[hook])); 346 get_entry(table_base, private->underflow[hook]));
347 347
348 do { 348 do {
349 const struct ipt_entry_target *t; 349 const struct xt_entry_target *t;
350 const struct xt_entry_match *ematch; 350 const struct xt_entry_match *ematch;
351 351
352 IP_NF_ASSERT(e); 352 IP_NF_ASSERT(e);
@@ -380,10 +380,10 @@ ipt_do_table(struct sk_buff *skb,
380 if (!t->u.kernel.target->target) { 380 if (!t->u.kernel.target->target) {
381 int v; 381 int v;
382 382
383 v = ((struct ipt_standard_target *)t)->verdict; 383 v = ((struct xt_standard_target *)t)->verdict;
384 if (v < 0) { 384 if (v < 0) {
385 /* Pop from stack? */ 385 /* Pop from stack? */
386 if (v != IPT_RETURN) { 386 if (v != XT_RETURN) {
387 verdict = (unsigned)(-v) - 1; 387 verdict = (unsigned)(-v) - 1;
388 break; 388 break;
389 } 389 }
@@ -421,7 +421,7 @@ ipt_do_table(struct sk_buff *skb,
421 verdict = t->u.kernel.target->target(skb, &acpar); 421 verdict = t->u.kernel.target->target(skb, &acpar);
422 /* Target might have changed stuff. */ 422 /* Target might have changed stuff. */
423 ip = ip_hdr(skb); 423 ip = ip_hdr(skb);
424 if (verdict == IPT_CONTINUE) 424 if (verdict == XT_CONTINUE)
425 e = ipt_next_entry(e); 425 e = ipt_next_entry(e);
426 else 426 else
427 /* Verdict */ 427 /* Verdict */
@@ -461,7 +461,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
461 e->counters.pcnt = pos; 461 e->counters.pcnt = pos;
462 462
463 for (;;) { 463 for (;;) {
464 const struct ipt_standard_target *t 464 const struct xt_standard_target *t
465 = (void *)ipt_get_target_c(e); 465 = (void *)ipt_get_target_c(e);
466 int visited = e->comefrom & (1 << hook); 466 int visited = e->comefrom & (1 << hook);
467 467
@@ -475,13 +475,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
475 /* Unconditional return/END. */ 475 /* Unconditional return/END. */
476 if ((e->target_offset == sizeof(struct ipt_entry) && 476 if ((e->target_offset == sizeof(struct ipt_entry) &&
477 (strcmp(t->target.u.user.name, 477 (strcmp(t->target.u.user.name,
478 IPT_STANDARD_TARGET) == 0) && 478 XT_STANDARD_TARGET) == 0) &&
479 t->verdict < 0 && unconditional(&e->ip)) || 479 t->verdict < 0 && unconditional(&e->ip)) ||
480 visited) { 480 visited) {
481 unsigned int oldpos, size; 481 unsigned int oldpos, size;
482 482
483 if ((strcmp(t->target.u.user.name, 483 if ((strcmp(t->target.u.user.name,
484 IPT_STANDARD_TARGET) == 0) && 484 XT_STANDARD_TARGET) == 0) &&
485 t->verdict < -NF_MAX_VERDICT - 1) { 485 t->verdict < -NF_MAX_VERDICT - 1) {
486 duprintf("mark_source_chains: bad " 486 duprintf("mark_source_chains: bad "
487 "negative verdict (%i)\n", 487 "negative verdict (%i)\n",
@@ -524,7 +524,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
524 int newpos = t->verdict; 524 int newpos = t->verdict;
525 525
526 if (strcmp(t->target.u.user.name, 526 if (strcmp(t->target.u.user.name,
527 IPT_STANDARD_TARGET) == 0 && 527 XT_STANDARD_TARGET) == 0 &&
528 newpos >= 0) { 528 newpos >= 0) {
529 if (newpos > newinfo->size - 529 if (newpos > newinfo->size -
530 sizeof(struct ipt_entry)) { 530 sizeof(struct ipt_entry)) {
@@ -552,7 +552,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
552 return 1; 552 return 1;
553} 553}
554 554
555static void cleanup_match(struct ipt_entry_match *m, struct net *net) 555static void cleanup_match(struct xt_entry_match *m, struct net *net)
556{ 556{
557 struct xt_mtdtor_param par; 557 struct xt_mtdtor_param par;
558 558
@@ -568,14 +568,14 @@ static void cleanup_match(struct ipt_entry_match *m, struct net *net)
568static int 568static int
569check_entry(const struct ipt_entry *e, const char *name) 569check_entry(const struct ipt_entry *e, const char *name)
570{ 570{
571 const struct ipt_entry_target *t; 571 const struct xt_entry_target *t;
572 572
573 if (!ip_checkentry(&e->ip)) { 573 if (!ip_checkentry(&e->ip)) {
574 duprintf("ip check failed %p %s.\n", e, par->match->name); 574 duprintf("ip check failed %p %s.\n", e, par->match->name);
575 return -EINVAL; 575 return -EINVAL;
576 } 576 }
577 577
578 if (e->target_offset + sizeof(struct ipt_entry_target) > 578 if (e->target_offset + sizeof(struct xt_entry_target) >
579 e->next_offset) 579 e->next_offset)
580 return -EINVAL; 580 return -EINVAL;
581 581
@@ -587,7 +587,7 @@ check_entry(const struct ipt_entry *e, const char *name)
587} 587}
588 588
589static int 589static int
590check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 590check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
591{ 591{
592 const struct ipt_ip *ip = par->entryinfo; 592 const struct ipt_ip *ip = par->entryinfo;
593 int ret; 593 int ret;
@@ -605,7 +605,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
605} 605}
606 606
607static int 607static int
608find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 608find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
609{ 609{
610 struct xt_match *match; 610 struct xt_match *match;
611 int ret; 611 int ret;
@@ -630,7 +630,7 @@ err:
630 630
631static int check_target(struct ipt_entry *e, struct net *net, const char *name) 631static int check_target(struct ipt_entry *e, struct net *net, const char *name)
632{ 632{
633 struct ipt_entry_target *t = ipt_get_target(e); 633 struct xt_entry_target *t = ipt_get_target(e);
634 struct xt_tgchk_param par = { 634 struct xt_tgchk_param par = {
635 .net = net, 635 .net = net,
636 .table = name, 636 .table = name,
@@ -656,7 +656,7 @@ static int
656find_check_entry(struct ipt_entry *e, struct net *net, const char *name, 656find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
657 unsigned int size) 657 unsigned int size)
658{ 658{
659 struct ipt_entry_target *t; 659 struct xt_entry_target *t;
660 struct xt_target *target; 660 struct xt_target *target;
661 int ret; 661 int ret;
662 unsigned int j; 662 unsigned int j;
@@ -707,7 +707,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
707 707
708static bool check_underflow(const struct ipt_entry *e) 708static bool check_underflow(const struct ipt_entry *e)
709{ 709{
710 const struct ipt_entry_target *t; 710 const struct xt_entry_target *t;
711 unsigned int verdict; 711 unsigned int verdict;
712 712
713 if (!unconditional(&e->ip)) 713 if (!unconditional(&e->ip))
@@ -715,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e)
715 t = ipt_get_target_c(e); 715 t = ipt_get_target_c(e);
716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
717 return false; 717 return false;
718 verdict = ((struct ipt_standard_target *)t)->verdict; 718 verdict = ((struct xt_standard_target *)t)->verdict;
719 verdict = -verdict - 1; 719 verdict = -verdict - 1;
720 return verdict == NF_DROP || verdict == NF_ACCEPT; 720 return verdict == NF_DROP || verdict == NF_ACCEPT;
721} 721}
@@ -738,7 +738,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
738 } 738 }
739 739
740 if (e->next_offset 740 if (e->next_offset
741 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { 741 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
742 duprintf("checking: element %p size %u\n", 742 duprintf("checking: element %p size %u\n",
743 e, e->next_offset); 743 e, e->next_offset);
744 return -EINVAL; 744 return -EINVAL;
@@ -771,7 +771,7 @@ static void
771cleanup_entry(struct ipt_entry *e, struct net *net) 771cleanup_entry(struct ipt_entry *e, struct net *net)
772{ 772{
773 struct xt_tgdtor_param par; 773 struct xt_tgdtor_param par;
774 struct ipt_entry_target *t; 774 struct xt_entry_target *t;
775 struct xt_entry_match *ematch; 775 struct xt_entry_match *ematch;
776 776
777 /* Cleanup all matches */ 777 /* Cleanup all matches */
@@ -972,8 +972,8 @@ copy_entries_to_user(unsigned int total_size,
972 /* ... then go back and fix counters and names */ 972 /* ... then go back and fix counters and names */
973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
974 unsigned int i; 974 unsigned int i;
975 const struct ipt_entry_match *m; 975 const struct xt_entry_match *m;
976 const struct ipt_entry_target *t; 976 const struct xt_entry_target *t;
977 977
978 e = (struct ipt_entry *)(loc_cpu_entry + off); 978 e = (struct ipt_entry *)(loc_cpu_entry + off);
979 if (copy_to_user(userptr + off 979 if (copy_to_user(userptr + off
@@ -990,7 +990,7 @@ copy_entries_to_user(unsigned int total_size,
990 m = (void *)e + i; 990 m = (void *)e + i;
991 991
992 if (copy_to_user(userptr + off + i 992 if (copy_to_user(userptr + off + i
993 + offsetof(struct ipt_entry_match, 993 + offsetof(struct xt_entry_match,
994 u.user.name), 994 u.user.name),
995 m->u.kernel.match->name, 995 m->u.kernel.match->name,
996 strlen(m->u.kernel.match->name)+1) 996 strlen(m->u.kernel.match->name)+1)
@@ -1002,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size,
1002 1002
1003 t = ipt_get_target_c(e); 1003 t = ipt_get_target_c(e);
1004 if (copy_to_user(userptr + off + e->target_offset 1004 if (copy_to_user(userptr + off + e->target_offset
1005 + offsetof(struct ipt_entry_target, 1005 + offsetof(struct xt_entry_target,
1006 u.user.name), 1006 u.user.name),
1007 t->u.kernel.target->name, 1007 t->u.kernel.target->name,
1008 strlen(t->u.kernel.target->name)+1) != 0) { 1008 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1040,7 +1040,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
1040 const void *base, struct xt_table_info *newinfo) 1040 const void *base, struct xt_table_info *newinfo)
1041{ 1041{
1042 const struct xt_entry_match *ematch; 1042 const struct xt_entry_match *ematch;
1043 const struct ipt_entry_target *t; 1043 const struct xt_entry_target *t;
1044 unsigned int entry_offset; 1044 unsigned int entry_offset;
1045 int off, i, ret; 1045 int off, i, ret;
1046 1046
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
1092static int get_info(struct net *net, void __user *user, 1092static int get_info(struct net *net, void __user *user,
1093 const int *len, int compat) 1093 const int *len, int compat)
1094{ 1094{
1095 char name[IPT_TABLE_MAXNAMELEN]; 1095 char name[XT_TABLE_MAXNAMELEN];
1096 struct xt_table *t; 1096 struct xt_table *t;
1097 int ret; 1097 int ret;
1098 1098
@@ -1105,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
1105 if (copy_from_user(name, user, sizeof(name)) != 0) 1105 if (copy_from_user(name, user, sizeof(name)) != 0)
1106 return -EFAULT; 1106 return -EFAULT;
1107 1107
1108 name[IPT_TABLE_MAXNAMELEN-1] = '\0'; 1108 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1109#ifdef CONFIG_COMPAT 1109#ifdef CONFIG_COMPAT
1110 if (compat) 1110 if (compat)
1111 xt_compat_lock(AF_INET); 1111 xt_compat_lock(AF_INET);
@@ -1124,6 +1124,7 @@ static int get_info(struct net *net, void __user *user,
1124 private = &tmp; 1124 private = &tmp;
1125 } 1125 }
1126#endif 1126#endif
1127 memset(&info, 0, sizeof(info));
1127 info.valid_hooks = t->valid_hooks; 1128 info.valid_hooks = t->valid_hooks;
1128 memcpy(info.hook_entry, private->hook_entry, 1129 memcpy(info.hook_entry, private->hook_entry,
1129 sizeof(info.hook_entry)); 1130 sizeof(info.hook_entry));
@@ -1400,14 +1401,14 @@ do_add_counters(struct net *net, const void __user *user,
1400 1401
1401#ifdef CONFIG_COMPAT 1402#ifdef CONFIG_COMPAT
1402struct compat_ipt_replace { 1403struct compat_ipt_replace {
1403 char name[IPT_TABLE_MAXNAMELEN]; 1404 char name[XT_TABLE_MAXNAMELEN];
1404 u32 valid_hooks; 1405 u32 valid_hooks;
1405 u32 num_entries; 1406 u32 num_entries;
1406 u32 size; 1407 u32 size;
1407 u32 hook_entry[NF_INET_NUMHOOKS]; 1408 u32 hook_entry[NF_INET_NUMHOOKS];
1408 u32 underflow[NF_INET_NUMHOOKS]; 1409 u32 underflow[NF_INET_NUMHOOKS];
1409 u32 num_counters; 1410 u32 num_counters;
1410 compat_uptr_t counters; /* struct ipt_counters * */ 1411 compat_uptr_t counters; /* struct xt_counters * */
1411 struct compat_ipt_entry entries[0]; 1412 struct compat_ipt_entry entries[0];
1412}; 1413};
1413 1414
@@ -1416,7 +1417,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1416 unsigned int *size, struct xt_counters *counters, 1417 unsigned int *size, struct xt_counters *counters,
1417 unsigned int i) 1418 unsigned int i)
1418{ 1419{
1419 struct ipt_entry_target *t; 1420 struct xt_entry_target *t;
1420 struct compat_ipt_entry __user *ce; 1421 struct compat_ipt_entry __user *ce;
1421 u_int16_t target_offset, next_offset; 1422 u_int16_t target_offset, next_offset;
1422 compat_uint_t origsize; 1423 compat_uint_t origsize;
@@ -1451,7 +1452,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1451} 1452}
1452 1453
1453static int 1454static int
1454compat_find_calc_match(struct ipt_entry_match *m, 1455compat_find_calc_match(struct xt_entry_match *m,
1455 const char *name, 1456 const char *name,
1456 const struct ipt_ip *ip, 1457 const struct ipt_ip *ip,
1457 unsigned int hookmask, 1458 unsigned int hookmask,
@@ -1473,7 +1474,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1473 1474
1474static void compat_release_entry(struct compat_ipt_entry *e) 1475static void compat_release_entry(struct compat_ipt_entry *e)
1475{ 1476{
1476 struct ipt_entry_target *t; 1477 struct xt_entry_target *t;
1477 struct xt_entry_match *ematch; 1478 struct xt_entry_match *ematch;
1478 1479
1479 /* Cleanup all matches */ 1480 /* Cleanup all matches */
@@ -1494,7 +1495,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1494 const char *name) 1495 const char *name)
1495{ 1496{
1496 struct xt_entry_match *ematch; 1497 struct xt_entry_match *ematch;
1497 struct ipt_entry_target *t; 1498 struct xt_entry_target *t;
1498 struct xt_target *target; 1499 struct xt_target *target;
1499 unsigned int entry_offset; 1500 unsigned int entry_offset;
1500 unsigned int j; 1501 unsigned int j;
@@ -1576,7 +1577,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1576 unsigned int *size, const char *name, 1577 unsigned int *size, const char *name,
1577 struct xt_table_info *newinfo, unsigned char *base) 1578 struct xt_table_info *newinfo, unsigned char *base)
1578{ 1579{
1579 struct ipt_entry_target *t; 1580 struct xt_entry_target *t;
1580 struct xt_target *target; 1581 struct xt_target *target;
1581 struct ipt_entry *de; 1582 struct ipt_entry *de;
1582 unsigned int origsize; 1583 unsigned int origsize;
@@ -1884,7 +1885,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1884} 1885}
1885 1886
1886struct compat_ipt_get_entries { 1887struct compat_ipt_get_entries {
1887 char name[IPT_TABLE_MAXNAMELEN]; 1888 char name[XT_TABLE_MAXNAMELEN];
1888 compat_uint_t size; 1889 compat_uint_t size;
1889 struct compat_ipt_entry entrytable[0]; 1890 struct compat_ipt_entry entrytable[0];
1890}; 1891};
@@ -2039,7 +2040,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2039 2040
2040 case IPT_SO_GET_REVISION_MATCH: 2041 case IPT_SO_GET_REVISION_MATCH:
2041 case IPT_SO_GET_REVISION_TARGET: { 2042 case IPT_SO_GET_REVISION_TARGET: {
2042 struct ipt_get_revision rev; 2043 struct xt_get_revision rev;
2043 int target; 2044 int target;
2044 2045
2045 if (*len != sizeof(rev)) { 2046 if (*len != sizeof(rev)) {
@@ -2176,7 +2177,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
2176 2177
2177static struct xt_target ipt_builtin_tg[] __read_mostly = { 2178static struct xt_target ipt_builtin_tg[] __read_mostly = {
2178 { 2179 {
2179 .name = IPT_STANDARD_TARGET, 2180 .name = XT_STANDARD_TARGET,
2180 .targetsize = sizeof(int), 2181 .targetsize = sizeof(int),
2181 .family = NFPROTO_IPV4, 2182 .family = NFPROTO_IPV4,
2182#ifdef CONFIG_COMPAT 2183#ifdef CONFIG_COMPAT
@@ -2186,9 +2187,9 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
2186#endif 2187#endif
2187 }, 2188 },
2188 { 2189 {
2189 .name = IPT_ERROR_TARGET, 2190 .name = XT_ERROR_TARGET,
2190 .target = ipt_error, 2191 .target = ipt_error,
2191 .targetsize = IPT_FUNCTION_MAXNAMELEN, 2192 .targetsize = XT_FUNCTION_MAXNAMELEN,
2192 .family = NFPROTO_IPV4, 2193 .family = NFPROTO_IPV4,
2193 }, 2194 },
2194}; 2195};
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db87..1e26a4897655 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <net/ip.h>
32 33
33#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
34 35
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
231{ 232{
232 const struct iphdr *iph = ip_hdr(skb); 233 const struct iphdr *iph = ip_hdr(skb);
233 unsigned long hashval; 234 unsigned long hashval;
234 u_int16_t sport, dport; 235 u_int16_t sport = 0, dport = 0;
235 const u_int16_t *ports; 236 int poff;
236 237
237 switch (iph->protocol) { 238 poff = proto_ports_offset(iph->protocol);
238 case IPPROTO_TCP: 239 if (poff >= 0) {
239 case IPPROTO_UDP: 240 const u_int16_t *ports;
240 case IPPROTO_UDPLITE: 241 u16 _ports[2];
241 case IPPROTO_SCTP: 242
242 case IPPROTO_DCCP: 243 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
243 case IPPROTO_ICMP: 244 if (ports) {
244 ports = (const void *)iph+iph->ihl*4; 245 sport = ports[0];
245 sport = ports[0]; 246 dport = ports[1];
246 dport = ports[1]; 247 }
247 break; 248 } else {
248 default:
249 if (net_ratelimit()) 249 if (net_ratelimit())
250 pr_info("unknown protocol %u\n", iph->protocol); 250 pr_info("unknown protocol %u\n", iph->protocol);
251 sport = dport = 0;
252 } 251 }
253 252
254 switch (config->hash_mode) { 253 switch (config->hash_mode) {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 915fc17d7ce2..72ffc8fda2e9 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,16 +24,15 @@
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv4/ipt_LOG.h> 25#include <linux/netfilter_ipv4/ipt_LOG.h>
26#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
27#include <net/netfilter/xt_log.h>
27 28
28MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 30MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
30MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog"); 31MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
31 32
32/* Use lock to serialize, so printks don't overlap */
33static DEFINE_SPINLOCK(log_lock);
34
35/* One level of recursion won't kill us */ 33/* One level of recursion won't kill us */
36static void dump_packet(const struct nf_loginfo *info, 34static void dump_packet(struct sbuff *m,
35 const struct nf_loginfo *info,
37 const struct sk_buff *skb, 36 const struct sk_buff *skb,
38 unsigned int iphoff) 37 unsigned int iphoff)
39{ 38{
@@ -48,32 +47,32 @@ static void dump_packet(const struct nf_loginfo *info,
48 47
49 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); 48 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
50 if (ih == NULL) { 49 if (ih == NULL) {
51 printk("TRUNCATED"); 50 sb_add(m, "TRUNCATED");
52 return; 51 return;
53 } 52 }
54 53
55 /* Important fields: 54 /* Important fields:
56 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ 55 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
57 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ 56 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
58 printk("SRC=%pI4 DST=%pI4 ", 57 sb_add(m, "SRC=%pI4 DST=%pI4 ",
59 &ih->saddr, &ih->daddr); 58 &ih->saddr, &ih->daddr);
60 59
61 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ 60 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
62 printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", 61 sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
63 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, 62 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
64 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); 63 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
65 64
66 /* Max length: 6 "CE DF MF " */ 65 /* Max length: 6 "CE DF MF " */
67 if (ntohs(ih->frag_off) & IP_CE) 66 if (ntohs(ih->frag_off) & IP_CE)
68 printk("CE "); 67 sb_add(m, "CE ");
69 if (ntohs(ih->frag_off) & IP_DF) 68 if (ntohs(ih->frag_off) & IP_DF)
70 printk("DF "); 69 sb_add(m, "DF ");
71 if (ntohs(ih->frag_off) & IP_MF) 70 if (ntohs(ih->frag_off) & IP_MF)
72 printk("MF "); 71 sb_add(m, "MF ");
73 72
74 /* Max length: 11 "FRAG:65535 " */ 73 /* Max length: 11 "FRAG:65535 " */
75 if (ntohs(ih->frag_off) & IP_OFFSET) 74 if (ntohs(ih->frag_off) & IP_OFFSET)
76 printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); 75 sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
77 76
78 if ((logflags & IPT_LOG_IPOPT) && 77 if ((logflags & IPT_LOG_IPOPT) &&
79 ih->ihl * 4 > sizeof(struct iphdr)) { 78 ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -85,15 +84,15 @@ static void dump_packet(const struct nf_loginfo *info,
85 op = skb_header_pointer(skb, iphoff+sizeof(_iph), 84 op = skb_header_pointer(skb, iphoff+sizeof(_iph),
86 optsize, _opt); 85 optsize, _opt);
87 if (op == NULL) { 86 if (op == NULL) {
88 printk("TRUNCATED"); 87 sb_add(m, "TRUNCATED");
89 return; 88 return;
90 } 89 }
91 90
92 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 91 /* Max length: 127 "OPT (" 15*4*2chars ") " */
93 printk("OPT ("); 92 sb_add(m, "OPT (");
94 for (i = 0; i < optsize; i++) 93 for (i = 0; i < optsize; i++)
95 printk("%02X", op[i]); 94 sb_add(m, "%02X", op[i]);
96 printk(") "); 95 sb_add(m, ") ");
97 } 96 }
98 97
99 switch (ih->protocol) { 98 switch (ih->protocol) {
@@ -102,7 +101,7 @@ static void dump_packet(const struct nf_loginfo *info,
102 const struct tcphdr *th; 101 const struct tcphdr *th;
103 102
104 /* Max length: 10 "PROTO=TCP " */ 103 /* Max length: 10 "PROTO=TCP " */
105 printk("PROTO=TCP "); 104 sb_add(m, "PROTO=TCP ");
106 105
107 if (ntohs(ih->frag_off) & IP_OFFSET) 106 if (ntohs(ih->frag_off) & IP_OFFSET)
108 break; 107 break;
@@ -111,41 +110,41 @@ static void dump_packet(const struct nf_loginfo *info,
111 th = skb_header_pointer(skb, iphoff + ih->ihl * 4, 110 th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
112 sizeof(_tcph), &_tcph); 111 sizeof(_tcph), &_tcph);
113 if (th == NULL) { 112 if (th == NULL) {
114 printk("INCOMPLETE [%u bytes] ", 113 sb_add(m, "INCOMPLETE [%u bytes] ",
115 skb->len - iphoff - ih->ihl*4); 114 skb->len - iphoff - ih->ihl*4);
116 break; 115 break;
117 } 116 }
118 117
119 /* Max length: 20 "SPT=65535 DPT=65535 " */ 118 /* Max length: 20 "SPT=65535 DPT=65535 " */
120 printk("SPT=%u DPT=%u ", 119 sb_add(m, "SPT=%u DPT=%u ",
121 ntohs(th->source), ntohs(th->dest)); 120 ntohs(th->source), ntohs(th->dest));
122 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 121 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
123 if (logflags & IPT_LOG_TCPSEQ) 122 if (logflags & IPT_LOG_TCPSEQ)
124 printk("SEQ=%u ACK=%u ", 123 sb_add(m, "SEQ=%u ACK=%u ",
125 ntohl(th->seq), ntohl(th->ack_seq)); 124 ntohl(th->seq), ntohl(th->ack_seq));
126 /* Max length: 13 "WINDOW=65535 " */ 125 /* Max length: 13 "WINDOW=65535 " */
127 printk("WINDOW=%u ", ntohs(th->window)); 126 sb_add(m, "WINDOW=%u ", ntohs(th->window));
128 /* Max length: 9 "RES=0x3F " */ 127 /* Max length: 9 "RES=0x3F " */
129 printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 128 sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
130 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 129 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
131 if (th->cwr) 130 if (th->cwr)
132 printk("CWR "); 131 sb_add(m, "CWR ");
133 if (th->ece) 132 if (th->ece)
134 printk("ECE "); 133 sb_add(m, "ECE ");
135 if (th->urg) 134 if (th->urg)
136 printk("URG "); 135 sb_add(m, "URG ");
137 if (th->ack) 136 if (th->ack)
138 printk("ACK "); 137 sb_add(m, "ACK ");
139 if (th->psh) 138 if (th->psh)
140 printk("PSH "); 139 sb_add(m, "PSH ");
141 if (th->rst) 140 if (th->rst)
142 printk("RST "); 141 sb_add(m, "RST ");
143 if (th->syn) 142 if (th->syn)
144 printk("SYN "); 143 sb_add(m, "SYN ");
145 if (th->fin) 144 if (th->fin)
146 printk("FIN "); 145 sb_add(m, "FIN ");
147 /* Max length: 11 "URGP=65535 " */ 146 /* Max length: 11 "URGP=65535 " */
148 printk("URGP=%u ", ntohs(th->urg_ptr)); 147 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
149 148
150 if ((logflags & IPT_LOG_TCPOPT) && 149 if ((logflags & IPT_LOG_TCPOPT) &&
151 th->doff * 4 > sizeof(struct tcphdr)) { 150 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -158,15 +157,15 @@ static void dump_packet(const struct nf_loginfo *info,
158 iphoff+ih->ihl*4+sizeof(_tcph), 157 iphoff+ih->ihl*4+sizeof(_tcph),
159 optsize, _opt); 158 optsize, _opt);
160 if (op == NULL) { 159 if (op == NULL) {
161 printk("TRUNCATED"); 160 sb_add(m, "TRUNCATED");
162 return; 161 return;
163 } 162 }
164 163
165 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 164 /* Max length: 127 "OPT (" 15*4*2chars ") " */
166 printk("OPT ("); 165 sb_add(m, "OPT (");
167 for (i = 0; i < optsize; i++) 166 for (i = 0; i < optsize; i++)
168 printk("%02X", op[i]); 167 sb_add(m, "%02X", op[i]);
169 printk(") "); 168 sb_add(m, ") ");
170 } 169 }
171 break; 170 break;
172 } 171 }
@@ -177,9 +176,9 @@ static void dump_packet(const struct nf_loginfo *info,
177 176
178 if (ih->protocol == IPPROTO_UDP) 177 if (ih->protocol == IPPROTO_UDP)
179 /* Max length: 10 "PROTO=UDP " */ 178 /* Max length: 10 "PROTO=UDP " */
180 printk("PROTO=UDP " ); 179 sb_add(m, "PROTO=UDP " );
181 else /* Max length: 14 "PROTO=UDPLITE " */ 180 else /* Max length: 14 "PROTO=UDPLITE " */
182 printk("PROTO=UDPLITE "); 181 sb_add(m, "PROTO=UDPLITE ");
183 182
184 if (ntohs(ih->frag_off) & IP_OFFSET) 183 if (ntohs(ih->frag_off) & IP_OFFSET)
185 break; 184 break;
@@ -188,13 +187,13 @@ static void dump_packet(const struct nf_loginfo *info,
188 uh = skb_header_pointer(skb, iphoff+ih->ihl*4, 187 uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
189 sizeof(_udph), &_udph); 188 sizeof(_udph), &_udph);
190 if (uh == NULL) { 189 if (uh == NULL) {
191 printk("INCOMPLETE [%u bytes] ", 190 sb_add(m, "INCOMPLETE [%u bytes] ",
192 skb->len - iphoff - ih->ihl*4); 191 skb->len - iphoff - ih->ihl*4);
193 break; 192 break;
194 } 193 }
195 194
196 /* Max length: 20 "SPT=65535 DPT=65535 " */ 195 /* Max length: 20 "SPT=65535 DPT=65535 " */
197 printk("SPT=%u DPT=%u LEN=%u ", 196 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
198 ntohs(uh->source), ntohs(uh->dest), 197 ntohs(uh->source), ntohs(uh->dest),
199 ntohs(uh->len)); 198 ntohs(uh->len));
200 break; 199 break;
@@ -221,7 +220,7 @@ static void dump_packet(const struct nf_loginfo *info,
221 [ICMP_ADDRESSREPLY] = 12 }; 220 [ICMP_ADDRESSREPLY] = 12 };
222 221
223 /* Max length: 11 "PROTO=ICMP " */ 222 /* Max length: 11 "PROTO=ICMP " */
224 printk("PROTO=ICMP "); 223 sb_add(m, "PROTO=ICMP ");
225 224
226 if (ntohs(ih->frag_off) & IP_OFFSET) 225 if (ntohs(ih->frag_off) & IP_OFFSET)
227 break; 226 break;
@@ -230,19 +229,19 @@ static void dump_packet(const struct nf_loginfo *info,
230 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, 229 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
231 sizeof(_icmph), &_icmph); 230 sizeof(_icmph), &_icmph);
232 if (ich == NULL) { 231 if (ich == NULL) {
233 printk("INCOMPLETE [%u bytes] ", 232 sb_add(m, "INCOMPLETE [%u bytes] ",
234 skb->len - iphoff - ih->ihl*4); 233 skb->len - iphoff - ih->ihl*4);
235 break; 234 break;
236 } 235 }
237 236
238 /* Max length: 18 "TYPE=255 CODE=255 " */ 237 /* Max length: 18 "TYPE=255 CODE=255 " */
239 printk("TYPE=%u CODE=%u ", ich->type, ich->code); 238 sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
240 239
241 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 240 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
242 if (ich->type <= NR_ICMP_TYPES && 241 if (ich->type <= NR_ICMP_TYPES &&
243 required_len[ich->type] && 242 required_len[ich->type] &&
244 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { 243 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
245 printk("INCOMPLETE [%u bytes] ", 244 sb_add(m, "INCOMPLETE [%u bytes] ",
246 skb->len - iphoff - ih->ihl*4); 245 skb->len - iphoff - ih->ihl*4);
247 break; 246 break;
248 } 247 }
@@ -251,35 +250,35 @@ static void dump_packet(const struct nf_loginfo *info,
251 case ICMP_ECHOREPLY: 250 case ICMP_ECHOREPLY:
252 case ICMP_ECHO: 251 case ICMP_ECHO:
253 /* Max length: 19 "ID=65535 SEQ=65535 " */ 252 /* Max length: 19 "ID=65535 SEQ=65535 " */
254 printk("ID=%u SEQ=%u ", 253 sb_add(m, "ID=%u SEQ=%u ",
255 ntohs(ich->un.echo.id), 254 ntohs(ich->un.echo.id),
256 ntohs(ich->un.echo.sequence)); 255 ntohs(ich->un.echo.sequence));
257 break; 256 break;
258 257
259 case ICMP_PARAMETERPROB: 258 case ICMP_PARAMETERPROB:
260 /* Max length: 14 "PARAMETER=255 " */ 259 /* Max length: 14 "PARAMETER=255 " */
261 printk("PARAMETER=%u ", 260 sb_add(m, "PARAMETER=%u ",
262 ntohl(ich->un.gateway) >> 24); 261 ntohl(ich->un.gateway) >> 24);
263 break; 262 break;
264 case ICMP_REDIRECT: 263 case ICMP_REDIRECT:
265 /* Max length: 24 "GATEWAY=255.255.255.255 " */ 264 /* Max length: 24 "GATEWAY=255.255.255.255 " */
266 printk("GATEWAY=%pI4 ", &ich->un.gateway); 265 sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
267 /* Fall through */ 266 /* Fall through */
268 case ICMP_DEST_UNREACH: 267 case ICMP_DEST_UNREACH:
269 case ICMP_SOURCE_QUENCH: 268 case ICMP_SOURCE_QUENCH:
270 case ICMP_TIME_EXCEEDED: 269 case ICMP_TIME_EXCEEDED:
271 /* Max length: 3+maxlen */ 270 /* Max length: 3+maxlen */
272 if (!iphoff) { /* Only recurse once. */ 271 if (!iphoff) { /* Only recurse once. */
273 printk("["); 272 sb_add(m, "[");
274 dump_packet(info, skb, 273 dump_packet(m, info, skb,
275 iphoff + ih->ihl*4+sizeof(_icmph)); 274 iphoff + ih->ihl*4+sizeof(_icmph));
276 printk("] "); 275 sb_add(m, "] ");
277 } 276 }
278 277
279 /* Max length: 10 "MTU=65535 " */ 278 /* Max length: 10 "MTU=65535 " */
280 if (ich->type == ICMP_DEST_UNREACH && 279 if (ich->type == ICMP_DEST_UNREACH &&
281 ich->code == ICMP_FRAG_NEEDED) 280 ich->code == ICMP_FRAG_NEEDED)
282 printk("MTU=%u ", ntohs(ich->un.frag.mtu)); 281 sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
283 } 282 }
284 break; 283 break;
285 } 284 }
@@ -292,19 +291,19 @@ static void dump_packet(const struct nf_loginfo *info,
292 break; 291 break;
293 292
294 /* Max length: 9 "PROTO=AH " */ 293 /* Max length: 9 "PROTO=AH " */
295 printk("PROTO=AH "); 294 sb_add(m, "PROTO=AH ");
296 295
297 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 296 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
298 ah = skb_header_pointer(skb, iphoff+ih->ihl*4, 297 ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
299 sizeof(_ahdr), &_ahdr); 298 sizeof(_ahdr), &_ahdr);
300 if (ah == NULL) { 299 if (ah == NULL) {
301 printk("INCOMPLETE [%u bytes] ", 300 sb_add(m, "INCOMPLETE [%u bytes] ",
302 skb->len - iphoff - ih->ihl*4); 301 skb->len - iphoff - ih->ihl*4);
303 break; 302 break;
304 } 303 }
305 304
306 /* Length: 15 "SPI=0xF1234567 " */ 305 /* Length: 15 "SPI=0xF1234567 " */
307 printk("SPI=0x%x ", ntohl(ah->spi)); 306 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
308 break; 307 break;
309 } 308 }
310 case IPPROTO_ESP: { 309 case IPPROTO_ESP: {
@@ -312,7 +311,7 @@ static void dump_packet(const struct nf_loginfo *info,
312 const struct ip_esp_hdr *eh; 311 const struct ip_esp_hdr *eh;
313 312
314 /* Max length: 10 "PROTO=ESP " */ 313 /* Max length: 10 "PROTO=ESP " */
315 printk("PROTO=ESP "); 314 sb_add(m, "PROTO=ESP ");
316 315
317 if (ntohs(ih->frag_off) & IP_OFFSET) 316 if (ntohs(ih->frag_off) & IP_OFFSET)
318 break; 317 break;
@@ -321,25 +320,25 @@ static void dump_packet(const struct nf_loginfo *info,
321 eh = skb_header_pointer(skb, iphoff+ih->ihl*4, 320 eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
322 sizeof(_esph), &_esph); 321 sizeof(_esph), &_esph);
323 if (eh == NULL) { 322 if (eh == NULL) {
324 printk("INCOMPLETE [%u bytes] ", 323 sb_add(m, "INCOMPLETE [%u bytes] ",
325 skb->len - iphoff - ih->ihl*4); 324 skb->len - iphoff - ih->ihl*4);
326 break; 325 break;
327 } 326 }
328 327
329 /* Length: 15 "SPI=0xF1234567 " */ 328 /* Length: 15 "SPI=0xF1234567 " */
330 printk("SPI=0x%x ", ntohl(eh->spi)); 329 sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
331 break; 330 break;
332 } 331 }
333 /* Max length: 10 "PROTO 255 " */ 332 /* Max length: 10 "PROTO 255 " */
334 default: 333 default:
335 printk("PROTO=%u ", ih->protocol); 334 sb_add(m, "PROTO=%u ", ih->protocol);
336 } 335 }
337 336
338 /* Max length: 15 "UID=4294967295 " */ 337 /* Max length: 15 "UID=4294967295 " */
339 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
340 read_lock_bh(&skb->sk->sk_callback_lock); 339 read_lock_bh(&skb->sk->sk_callback_lock);
341 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
342 printk("UID=%u GID=%u ", 341 sb_add(m, "UID=%u GID=%u ",
343 skb->sk->sk_socket->file->f_cred->fsuid, 342 skb->sk->sk_socket->file->f_cred->fsuid,
344 skb->sk->sk_socket->file->f_cred->fsgid); 343 skb->sk->sk_socket->file->f_cred->fsgid);
345 read_unlock_bh(&skb->sk->sk_callback_lock); 344 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -347,7 +346,7 @@ static void dump_packet(const struct nf_loginfo *info,
347 346
348 /* Max length: 16 "MARK=0xFFFFFFFF " */ 347 /* Max length: 16 "MARK=0xFFFFFFFF " */
349 if (!iphoff && skb->mark) 348 if (!iphoff && skb->mark)
350 printk("MARK=0x%x ", skb->mark); 349 sb_add(m, "MARK=0x%x ", skb->mark);
351 350
352 /* Proto Max log string length */ 351 /* Proto Max log string length */
353 /* IP: 40+46+6+11+127 = 230 */ 352 /* IP: 40+46+6+11+127 = 230 */
@@ -364,7 +363,8 @@ static void dump_packet(const struct nf_loginfo *info,
364 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 363 /* maxlen = 230+ 91 + 230 + 252 = 803 */
365} 364}
366 365
367static void dump_mac_header(const struct nf_loginfo *info, 366static void dump_mac_header(struct sbuff *m,
367 const struct nf_loginfo *info,
368 const struct sk_buff *skb) 368 const struct sk_buff *skb)
369{ 369{
370 struct net_device *dev = skb->dev; 370 struct net_device *dev = skb->dev;
@@ -378,7 +378,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
378 378
379 switch (dev->type) { 379 switch (dev->type) {
380 case ARPHRD_ETHER: 380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 381 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto)); 383 ntohs(eth_hdr(skb)->h_proto));
384 return; 384 return;
@@ -387,17 +387,17 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 } 387 }
388 388
389fallback: 389fallback:
390 printk("MAC="); 390 sb_add(m, "MAC=");
391 if (dev->hard_header_len && 391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) { 392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb); 393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i; 394 unsigned int i;
395 395
396 printk("%02x", *p++); 396 sb_add(m, "%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++) 397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p); 398 sb_add(m, ":%02x", *p);
399 } 399 }
400 printk(" "); 400 sb_add(m, " ");
401} 401}
402 402
403static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
@@ -419,11 +419,12 @@ ipt_log_packet(u_int8_t pf,
419 const struct nf_loginfo *loginfo, 419 const struct nf_loginfo *loginfo,
420 const char *prefix) 420 const char *prefix)
421{ 421{
422 struct sbuff *m = sb_open();
423
422 if (!loginfo) 424 if (!loginfo)
423 loginfo = &default_loginfo; 425 loginfo = &default_loginfo;
424 426
425 spin_lock_bh(&log_lock); 427 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
426 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
427 prefix, 428 prefix,
428 in ? in->name : "", 429 in ? in->name : "",
429 out ? out->name : ""); 430 out ? out->name : "");
@@ -434,20 +435,20 @@ ipt_log_packet(u_int8_t pf,
434 435
435 physindev = skb->nf_bridge->physindev; 436 physindev = skb->nf_bridge->physindev;
436 if (physindev && in != physindev) 437 if (physindev && in != physindev)
437 printk("PHYSIN=%s ", physindev->name); 438 sb_add(m, "PHYSIN=%s ", physindev->name);
438 physoutdev = skb->nf_bridge->physoutdev; 439 physoutdev = skb->nf_bridge->physoutdev;
439 if (physoutdev && out != physoutdev) 440 if (physoutdev && out != physoutdev)
440 printk("PHYSOUT=%s ", physoutdev->name); 441 sb_add(m, "PHYSOUT=%s ", physoutdev->name);
441 } 442 }
442#endif 443#endif
443 444
444 /* MAC logging for input path only. */ 445 /* MAC logging for input path only. */
445 if (in && !out) 446 if (in && !out)
446 dump_mac_header(loginfo, skb); 447 dump_mac_header(m, loginfo, skb);
448
449 dump_packet(m, loginfo, skb, 0);
447 450
448 dump_packet(loginfo, skb, 0); 451 sb_close(m);
449 printk("\n");
450 spin_unlock_bh(&log_lock);
451} 452}
452 453
453static unsigned int 454static unsigned int
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index c31b87668250..0f23b3f06df0 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,9 +44,16 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret;
48
47 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
48 if (nf_ct_expect_related(exp) == 0) 50 ret = nf_ct_expect_related(exp);
51 if (ret == 0)
52 break;
53 else if (ret != -EBUSY) {
54 port = 0;
49 break; 55 break;
56 }
50 } 57 }
51 58
52 if (port == 0) 59 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 957c9241fb0c..c04787ce1a71 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,28 +47,6 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum)
52{
53 const struct nf_nat_protocol *p;
54
55 rcu_read_lock();
56 p = __nf_nat_proto_find(protonum);
57 if (!try_module_get(p->me))
58 p = &nf_nat_unknown_protocol;
59 rcu_read_unlock();
60
61 return p;
62}
63EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
64
65void
66nf_nat_proto_put(const struct nf_nat_protocol *p)
67{
68 module_put(p->me);
69}
70EXPORT_SYMBOL_GPL(nf_nat_proto_put);
71
72/* We keep an extra hash for each conntrack, for fast searching. */ 50/* We keep an extra hash for each conntrack, for fast searching. */
73static inline unsigned int 51static inline unsigned int
74hash_by_src(const struct net *net, u16 zone, 52hash_by_src(const struct net *net, u16 zone,
@@ -262,11 +240,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 240 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 241
264 /* Only bother mapping if it's not already in range and unique */ 242 /* Only bother mapping if it's not already in range and unique */
265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && 243 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 244 if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
267 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 245 if (proto->in_range(tuple, maniptype, &range->min,
268 !nf_nat_used_tuple(tuple, ct)) 246 &range->max) &&
269 goto out; 247 (range->min.all == range->max.all ||
248 !nf_nat_used_tuple(tuple, ct)))
249 goto out;
250 } else if (!nf_nat_used_tuple(tuple, ct)) {
251 goto out;
252 }
253 }
270 254
271 /* Last change: get protocol to try to obtain unique tuple. */ 255 /* Last change: get protocol to try to obtain unique tuple. */
272 proto->unique_tuple(tuple, range, maniptype, ct); 256 proto->unique_tuple(tuple, range, maniptype, ct);
@@ -458,6 +442,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
458 return 0; 442 return 0;
459 } 443 }
460 444
445 if (manip == IP_NAT_MANIP_SRC)
446 statusbit = IPS_SRC_NAT;
447 else
448 statusbit = IPS_DST_NAT;
449
450 /* Invert if this is reply dir. */
451 if (dir == IP_CT_DIR_REPLY)
452 statusbit ^= IPS_NAT_MASK;
453
454 if (!(ct->status & statusbit))
455 return 1;
456
461 pr_debug("icmp_reply_translation: translating error %p manip %u " 457 pr_debug("icmp_reply_translation: translating error %p manip %u "
462 "dir %s\n", skb, manip, 458 "dir %s\n", skb, manip,
463 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 459 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
@@ -492,20 +488,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
492 488
493 /* Change outer to look the reply to an incoming packet 489 /* Change outer to look the reply to an incoming packet
494 * (proto 0 means don't invert per-proto part). */ 490 * (proto 0 means don't invert per-proto part). */
495 if (manip == IP_NAT_MANIP_SRC) 491 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
496 statusbit = IPS_SRC_NAT; 492 if (!manip_pkt(0, skb, 0, &target, manip))
497 else 493 return 0;
498 statusbit = IPS_DST_NAT;
499
500 /* Invert if this is reply dir. */
501 if (dir == IP_CT_DIR_REPLY)
502 statusbit ^= IPS_NAT_MASK;
503
504 if (ct->status & statusbit) {
505 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
506 if (!manip_pkt(0, skb, 0, &target, manip))
507 return 0;
508 }
509 494
510 return 1; 495 return 1;
511} 496}
@@ -583,6 +568,26 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
583#include <linux/netfilter/nfnetlink.h> 568#include <linux/netfilter/nfnetlink.h>
584#include <linux/netfilter/nfnetlink_conntrack.h> 569#include <linux/netfilter/nfnetlink_conntrack.h>
585 570
571static const struct nf_nat_protocol *
572nf_nat_proto_find_get(u_int8_t protonum)
573{
574 const struct nf_nat_protocol *p;
575
576 rcu_read_lock();
577 p = __nf_nat_proto_find(protonum);
578 if (!try_module_get(p->me))
579 p = &nf_nat_unknown_protocol;
580 rcu_read_unlock();
581
582 return p;
583}
584
585static void
586nf_nat_proto_put(const struct nf_nat_protocol *p)
587{
588 module_put(p->me);
589}
590
586static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 591static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
587 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 592 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
588 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 593 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 86e0e84ff0a0..dc73abb3fe27 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -79,9 +79,16 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
79 79
80 /* Try to get same port: if not, try to change it. */ 80 /* Try to get same port: if not, try to change it. */
81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
82 int ret;
83
82 exp->tuple.dst.u.tcp.port = htons(port); 84 exp->tuple.dst.u.tcp.port = htons(port);
83 if (nf_ct_expect_related(exp) == 0) 85 ret = nf_ct_expect_related(exp);
86 if (ret == 0)
87 break;
88 else if (ret != -EBUSY) {
89 port = 0;
84 break; 90 break;
91 }
85 } 92 }
86 93
87 if (port == 0) 94 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 5045196d853c..790f3160e012 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -222,13 +222,24 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
222 /* Try to get a pair of ports. */ 222 /* Try to get a pair of ports. */
223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); 223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
224 nated_port != 0; nated_port += 2) { 224 nated_port != 0; nated_port += 2) {
225 int ret;
226
225 rtp_exp->tuple.dst.u.udp.port = htons(nated_port); 227 rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
226 if (nf_ct_expect_related(rtp_exp) == 0) { 228 ret = nf_ct_expect_related(rtp_exp);
229 if (ret == 0) {
227 rtcp_exp->tuple.dst.u.udp.port = 230 rtcp_exp->tuple.dst.u.udp.port =
228 htons(nated_port + 1); 231 htons(nated_port + 1);
229 if (nf_ct_expect_related(rtcp_exp) == 0) 232 ret = nf_ct_expect_related(rtcp_exp);
233 if (ret == 0)
234 break;
235 else if (ret != -EBUSY) {
236 nf_ct_unexpect_related(rtp_exp);
237 nated_port = 0;
230 break; 238 break;
231 nf_ct_unexpect_related(rtp_exp); 239 }
240 } else if (ret != -EBUSY) {
241 nated_port = 0;
242 break;
232 } 243 }
233 } 244 }
234 245
@@ -284,9 +295,16 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
284 295
285 /* Try to get same port: if not, try to change it. */ 296 /* Try to get same port: if not, try to change it. */
286 for (; nated_port != 0; nated_port++) { 297 for (; nated_port != 0; nated_port++) {
298 int ret;
299
287 exp->tuple.dst.u.tcp.port = htons(nated_port); 300 exp->tuple.dst.u.tcp.port = htons(nated_port);
288 if (nf_ct_expect_related(exp) == 0) 301 ret = nf_ct_expect_related(exp);
302 if (ret == 0)
303 break;
304 else if (ret != -EBUSY) {
305 nated_port = 0;
289 break; 306 break;
307 }
290 } 308 }
291 309
292 if (nated_port == 0) { /* No port available */ 310 if (nated_port == 0) { /* No port available */
@@ -334,9 +352,16 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
334 352
335 /* Try to get same port: if not, try to change it. */ 353 /* Try to get same port: if not, try to change it. */
336 for (; nated_port != 0; nated_port++) { 354 for (; nated_port != 0; nated_port++) {
355 int ret;
356
337 exp->tuple.dst.u.tcp.port = htons(nated_port); 357 exp->tuple.dst.u.tcp.port = htons(nated_port);
338 if (nf_ct_expect_related(exp) == 0) 358 ret = nf_ct_expect_related(exp);
359 if (ret == 0)
339 break; 360 break;
361 else if (ret != -EBUSY) {
362 nated_port = 0;
363 break;
364 }
340 } 365 }
341 366
342 if (nated_port == 0) { /* No port available */ 367 if (nated_port == 0) { /* No port available */
@@ -418,9 +443,16 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
418 443
419 /* Try to get same port: if not, try to change it. */ 444 /* Try to get same port: if not, try to change it. */
420 for (; nated_port != 0; nated_port++) { 445 for (; nated_port != 0; nated_port++) {
446 int ret;
447
421 exp->tuple.dst.u.tcp.port = htons(nated_port); 448 exp->tuple.dst.u.tcp.port = htons(nated_port);
422 if (nf_ct_expect_related(exp) == 0) 449 ret = nf_ct_expect_related(exp);
450 if (ret == 0)
451 break;
452 else if (ret != -EBUSY) {
453 nated_port = 0;
423 break; 454 break;
455 }
424 } 456 }
425 457
426 if (nated_port == 0) { /* No port available */ 458 if (nated_port == 0) { /* No port available */
@@ -500,9 +532,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
500 532
501 /* Try to get same port: if not, try to change it. */ 533 /* Try to get same port: if not, try to change it. */
502 for (nated_port = ntohs(port); nated_port != 0; nated_port++) { 534 for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
535 int ret;
536
503 exp->tuple.dst.u.tcp.port = htons(nated_port); 537 exp->tuple.dst.u.tcp.port = htons(nated_port);
504 if (nf_ct_expect_related(exp) == 0) 538 ret = nf_ct_expect_related(exp);
539 if (ret == 0)
505 break; 540 break;
541 else if (ret != -EBUSY) {
542 nated_port = 0;
543 break;
544 }
506 } 545 }
507 546
508 if (nated_port == 0) { /* No port available */ 547 if (nated_port == 0) { /* No port available */
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4a0c6b548eee..31427fb57aa8 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,6 +153,35 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen)
158{
159 struct rtable *rt = skb_rtable(skb);
160
161 if (skb->ip_summed != CHECKSUM_PARTIAL) {
162 if (!(rt->rt_flags & RTCF_LOCAL) &&
163 skb->dev->features & NETIF_F_V4_CSUM) {
164 skb->ip_summed = CHECKSUM_PARTIAL;
165 skb->csum_start = skb_headroom(skb) +
166 skb_network_offset(skb) +
167 iph->ihl * 4;
168 skb->csum_offset = (void *)check - data;
169 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
170 datalen, iph->protocol, 0);
171 } else {
172 *check = 0;
173 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
174 datalen, iph->protocol,
175 csum_partial(data, datalen,
176 0));
177 if (iph->protocol == IPPROTO_UDP && !*check)
178 *check = CSUM_MANGLED_0;
179 }
180 } else
181 inet_proto_csum_replace2(check, skb,
182 htons(oldlen), htons(datalen), 1);
183}
184
156/* Generic function for mangling variable-length address changes inside 185/* Generic function for mangling variable-length address changes inside
157 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 186 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
158 * command in FTP). 187 * command in FTP).
@@ -169,7 +198,6 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
169 const char *rep_buffer, 198 const char *rep_buffer,
170 unsigned int rep_len, bool adjust) 199 unsigned int rep_len, bool adjust)
171{ 200{
172 struct rtable *rt = skb_rtable(skb);
173 struct iphdr *iph; 201 struct iphdr *iph;
174 struct tcphdr *tcph; 202 struct tcphdr *tcph;
175 int oldlen, datalen; 203 int oldlen, datalen;
@@ -192,26 +220,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
192 match_offset, match_len, rep_buffer, rep_len); 220 match_offset, match_len, rep_buffer, rep_len);
193 221
194 datalen = skb->len - iph->ihl*4; 222 datalen = skb->len - iph->ihl*4;
195 if (skb->ip_summed != CHECKSUM_PARTIAL) { 223 nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
196 if (!(rt->rt_flags & RTCF_LOCAL) &&
197 skb->dev->features & NETIF_F_V4_CSUM) {
198 skb->ip_summed = CHECKSUM_PARTIAL;
199 skb->csum_start = skb_headroom(skb) +
200 skb_network_offset(skb) +
201 iph->ihl * 4;
202 skb->csum_offset = offsetof(struct tcphdr, check);
203 tcph->check = ~tcp_v4_check(datalen,
204 iph->saddr, iph->daddr, 0);
205 } else {
206 tcph->check = 0;
207 tcph->check = tcp_v4_check(datalen,
208 iph->saddr, iph->daddr,
209 csum_partial(tcph,
210 datalen, 0));
211 }
212 } else
213 inet_proto_csum_replace2(&tcph->check, skb,
214 htons(oldlen), htons(datalen), 1);
215 224
216 if (adjust && rep_len != match_len) 225 if (adjust && rep_len != match_len)
217 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 226 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
@@ -240,7 +249,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
240 const char *rep_buffer, 249 const char *rep_buffer,
241 unsigned int rep_len) 250 unsigned int rep_len)
242{ 251{
243 struct rtable *rt = skb_rtable(skb);
244 struct iphdr *iph; 252 struct iphdr *iph;
245 struct udphdr *udph; 253 struct udphdr *udph;
246 int datalen, oldlen; 254 int datalen, oldlen;
@@ -274,29 +282,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
274 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 282 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
275 return 1; 283 return 1;
276 284
277 if (skb->ip_summed != CHECKSUM_PARTIAL) { 285 nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
278 if (!(rt->rt_flags & RTCF_LOCAL) &&
279 skb->dev->features & NETIF_F_V4_CSUM) {
280 skb->ip_summed = CHECKSUM_PARTIAL;
281 skb->csum_start = skb_headroom(skb) +
282 skb_network_offset(skb) +
283 iph->ihl * 4;
284 skb->csum_offset = offsetof(struct udphdr, check);
285 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
286 datalen, IPPROTO_UDP,
287 0);
288 } else {
289 udph->check = 0;
290 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
291 datalen, IPPROTO_UDP,
292 csum_partial(udph,
293 datalen, 0));
294 if (!udph->check)
295 udph->check = CSUM_MANGLED_0;
296 }
297 } else
298 inet_proto_csum_replace2(&udph->check, skb,
299 htons(oldlen), htons(datalen), 1);
300 286
301 return 1; 287 return 1;
302} 288}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index ea83a886b03e..535e1a802356 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -45,9 +45,16 @@ static unsigned int help(struct sk_buff *skb,
45 45
46 /* Try to get same port: if not, try to change it. */ 46 /* Try to get same port: if not, try to change it. */
47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
48 int ret;
49
48 exp->tuple.dst.u.tcp.port = htons(port); 50 exp->tuple.dst.u.tcp.port = htons(port);
49 if (nf_ct_expect_related(exp) == 0) 51 ret = nf_ct_expect_related(exp);
52 if (ret == 0)
53 break;
54 else if (ret != -EBUSY) {
55 port = 0;
50 break; 56 break;
57 }
51 } 58 }
52 59
53 if (port == 0) 60 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ebbd319f62f5..21c30426480b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -106,16 +106,15 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
106{ 106{
107 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
109 Use reply in case it's already been mangled (eg local packet).
110 */ 109 */
111 __be32 ip 110 struct nf_nat_range range;
112 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC 111
113 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip 112 range.flags = 0;
114 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
115 struct nf_nat_range range 114 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
116 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; 115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
117 116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
118 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); 117
119 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 118 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
120} 119}
121 120
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 11b538deaaec..e40cf7816fdb 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -307,9 +307,16 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
307 exp->expectfn = ip_nat_sip_expected; 307 exp->expectfn = ip_nat_sip_expected;
308 308
309 for (; port != 0; port++) { 309 for (; port != 0; port++) {
310 int ret;
311
310 exp->tuple.dst.u.udp.port = htons(port); 312 exp->tuple.dst.u.udp.port = htons(port);
311 if (nf_ct_expect_related(exp) == 0) 313 ret = nf_ct_expect_related(exp);
314 if (ret == 0)
315 break;
316 else if (ret != -EBUSY) {
317 port = 0;
312 break; 318 break;
319 }
313 } 320 }
314 321
315 if (port == 0) 322 if (port == 0)
@@ -480,13 +487,25 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
480 /* Try to get same pair of ports: if not, try to change them. */ 487 /* Try to get same pair of ports: if not, try to change them. */
481 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); 488 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
482 port != 0; port += 2) { 489 port != 0; port += 2) {
490 int ret;
491
483 rtp_exp->tuple.dst.u.udp.port = htons(port); 492 rtp_exp->tuple.dst.u.udp.port = htons(port);
484 if (nf_ct_expect_related(rtp_exp) != 0) 493 ret = nf_ct_expect_related(rtp_exp);
494 if (ret == -EBUSY)
485 continue; 495 continue;
496 else if (ret < 0) {
497 port = 0;
498 break;
499 }
486 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); 500 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
487 if (nf_ct_expect_related(rtcp_exp) == 0) 501 ret = nf_ct_expect_related(rtcp_exp);
502 if (ret == 0)
488 break; 503 break;
489 nf_ct_unexpect_related(rtp_exp); 504 else if (ret != -EBUSY) {
505 nf_ct_unexpect_related(rtp_exp);
506 port = 0;
507 break;
508 }
490 } 509 }
491 510
492 if (port == 0) 511 if (port == 0)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4ae1f203f7cb..1b48eb1ed453 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -59,13 +59,13 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", 62 seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
63 sock_prot_inuse_get(net, &tcp_prot), orphans, 63 sock_prot_inuse_get(net, &tcp_prot), orphans,
64 tcp_death_row.tw_count, sockets, 64 tcp_death_row.tw_count, sockets,
65 atomic_read(&tcp_memory_allocated)); 65 atomic_long_read(&tcp_memory_allocated));
66 seq_printf(seq, "UDP: inuse %d mem %d\n", 66 seq_printf(seq, "UDP: inuse %d mem %ld\n",
67 sock_prot_inuse_get(net, &udp_prot), 67 sock_prot_inuse_get(net, &udp_prot),
68 atomic_read(&udp_memory_allocated)); 68 atomic_long_read(&udp_memory_allocated));
69 seq_printf(seq, "UDPLITE: inuse %d\n", 69 seq_printf(seq, "UDPLITE: inuse %d\n",
70 sock_prot_inuse_get(net, &udplite_prot)); 70 sock_prot_inuse_get(net, &udplite_prot));
71 seq_printf(seq, "RAW: inuse %d\n", 71 seq_printf(seq, "RAW: inuse %d\n",
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index f2d297351405..9ae5c01cd0b2 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,8 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
32static DEFINE_SPINLOCK(inet_proto_lock);
33 32
34/* 33/*
35 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -37,20 +36,10 @@ static DEFINE_SPINLOCK(inet_proto_lock);
37 36
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 38{
40 int hash, ret; 39 int hash = protocol & (MAX_INET_PROTOS - 1);
41 40
42 hash = protocol & (MAX_INET_PROTOS - 1); 41 return !cmpxchg((const struct net_protocol **)&inet_protos[hash],
43 42 NULL, prot) ? 0 : -1;
44 spin_lock_bh(&inet_proto_lock);
45 if (inet_protos[hash]) {
46 ret = -1;
47 } else {
48 inet_protos[hash] = prot;
49 ret = 0;
50 }
51 spin_unlock_bh(&inet_proto_lock);
52
53 return ret;
54} 43}
55EXPORT_SYMBOL(inet_add_protocol); 44EXPORT_SYMBOL(inet_add_protocol);
56 45
@@ -60,18 +49,10 @@ EXPORT_SYMBOL(inet_add_protocol);
60 49
61int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 50int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
62{ 51{
63 int hash, ret; 52 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
64
65 hash = protocol & (MAX_INET_PROTOS - 1);
66 53
67 spin_lock_bh(&inet_proto_lock); 54 ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash],
68 if (inet_protos[hash] == prot) { 55 prot, NULL) == prot) ? 0 : -1;
69 inet_protos[hash] = NULL;
70 ret = 0;
71 } else {
72 ret = -1;
73 }
74 spin_unlock_bh(&inet_proto_lock);
75 56
76 synchronize_net(); 57 synchronize_net();
77 58
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
505 505
506 ipc.addr = inet->inet_saddr; 506 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 507 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 508 ipc.tx_flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 509 ipc.oif = sk->sk_bound_dev_if;
510 510
511 if (msg->msg_controllen) { 511 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ac6559cb54f9..987bf9adb318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
159 .link_failure = ipv4_link_failure, 159 .link_failure = ipv4_link_failure,
160 .update_pmtu = ip_rt_update_pmtu, 160 .update_pmtu = ip_rt_update_pmtu,
161 .local_out = __ip_local_out, 161 .local_out = __ip_local_out,
162 .entries = ATOMIC_INIT(0),
163}; 162};
164 163
165#define ECN_OR_COST(class) TC_PRIO_##class 164#define ECN_OR_COST(class) TC_PRIO_##class
@@ -199,7 +198,7 @@ const __u8 ip_tos2prio[16] = {
199 */ 198 */
200 199
201struct rt_hash_bucket { 200struct rt_hash_bucket {
202 struct rtable *chain; 201 struct rtable __rcu *chain;
203}; 202};
204 203
205#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -281,7 +280,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
281 struct rtable *r = NULL; 280 struct rtable *r = NULL;
282 281
283 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
284 if (!rt_hash_table[st->bucket].chain) 283 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
285 continue; 284 continue;
286 rcu_read_lock_bh(); 285 rcu_read_lock_bh();
287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 286 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -301,17 +300,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
301{ 300{
302 struct rt_cache_iter_state *st = seq->private; 301 struct rt_cache_iter_state *st = seq->private;
303 302
304 r = r->dst.rt_next; 303 r = rcu_dereference_bh(r->dst.rt_next);
305 while (!r) { 304 while (!r) {
306 rcu_read_unlock_bh(); 305 rcu_read_unlock_bh();
307 do { 306 do {
308 if (--st->bucket < 0) 307 if (--st->bucket < 0)
309 return NULL; 308 return NULL;
310 } while (!rt_hash_table[st->bucket].chain); 309 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
311 rcu_read_lock_bh(); 310 rcu_read_lock_bh();
312 r = rt_hash_table[st->bucket].chain; 311 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
313 } 312 }
314 return rcu_dereference_bh(r); 313 return r;
315} 314}
316 315
317static struct rtable *rt_cache_get_next(struct seq_file *seq, 316static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
466 465
467 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 466 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
468 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 467 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
469 atomic_read(&ipv4_dst_ops.entries), 468 dst_entries_get_slow(&ipv4_dst_ops),
470 st->in_hit, 469 st->in_hit,
471 st->in_slow_tot, 470 st->in_slow_tot,
472 st->in_slow_mc, 471 st->in_slow_mc,
@@ -722,19 +721,23 @@ static void rt_do_flush(int process_context)
722 for (i = 0; i <= rt_hash_mask; i++) { 721 for (i = 0; i <= rt_hash_mask; i++) {
723 if (process_context && need_resched()) 722 if (process_context && need_resched())
724 cond_resched(); 723 cond_resched();
725 rth = rt_hash_table[i].chain; 724 rth = rcu_dereference_raw(rt_hash_table[i].chain);
726 if (!rth) 725 if (!rth)
727 continue; 726 continue;
728 727
729 spin_lock_bh(rt_hash_lock_addr(i)); 728 spin_lock_bh(rt_hash_lock_addr(i));
730#ifdef CONFIG_NET_NS 729#ifdef CONFIG_NET_NS
731 { 730 {
732 struct rtable ** prev, * p; 731 struct rtable __rcu **prev;
732 struct rtable *p;
733 733
734 rth = rt_hash_table[i].chain; 734 rth = rcu_dereference_protected(rt_hash_table[i].chain,
735 lockdep_is_held(rt_hash_lock_addr(i)));
735 736
736 /* defer releasing the head of the list after spin_unlock */ 737 /* defer releasing the head of the list after spin_unlock */
737 for (tail = rth; tail; tail = tail->dst.rt_next) 738 for (tail = rth; tail;
739 tail = rcu_dereference_protected(tail->dst.rt_next,
740 lockdep_is_held(rt_hash_lock_addr(i))))
738 if (!rt_is_expired(tail)) 741 if (!rt_is_expired(tail))
739 break; 742 break;
740 if (rth != tail) 743 if (rth != tail)
@@ -742,8 +745,12 @@ static void rt_do_flush(int process_context)
742 745
743 /* call rt_free on entries after the tail requiring flush */ 746 /* call rt_free on entries after the tail requiring flush */
744 prev = &rt_hash_table[i].chain; 747 prev = &rt_hash_table[i].chain;
745 for (p = *prev; p; p = next) { 748 for (p = rcu_dereference_protected(*prev,
746 next = p->dst.rt_next; 749 lockdep_is_held(rt_hash_lock_addr(i)));
750 p != NULL;
751 p = next) {
752 next = rcu_dereference_protected(p->dst.rt_next,
753 lockdep_is_held(rt_hash_lock_addr(i)));
747 if (!rt_is_expired(p)) { 754 if (!rt_is_expired(p)) {
748 prev = &p->dst.rt_next; 755 prev = &p->dst.rt_next;
749 } else { 756 } else {
@@ -753,14 +760,15 @@ static void rt_do_flush(int process_context)
753 } 760 }
754 } 761 }
755#else 762#else
756 rth = rt_hash_table[i].chain; 763 rth = rcu_dereference_protected(rt_hash_table[i].chain,
757 rt_hash_table[i].chain = NULL; 764 lockdep_is_held(rt_hash_lock_addr(i)));
765 rcu_assign_pointer(rt_hash_table[i].chain, NULL);
758 tail = NULL; 766 tail = NULL;
759#endif 767#endif
760 spin_unlock_bh(rt_hash_lock_addr(i)); 768 spin_unlock_bh(rt_hash_lock_addr(i));
761 769
762 for (; rth != tail; rth = next) { 770 for (; rth != tail; rth = next) {
763 next = rth->dst.rt_next; 771 next = rcu_dereference_protected(rth->dst.rt_next, 1);
764 rt_free(rth); 772 rt_free(rth);
765 } 773 }
766 } 774 }
@@ -791,7 +799,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
791 while (aux != rth) { 799 while (aux != rth) {
792 if (compare_hash_inputs(&aux->fl, &rth->fl)) 800 if (compare_hash_inputs(&aux->fl, &rth->fl))
793 return 0; 801 return 0;
794 aux = aux->dst.rt_next; 802 aux = rcu_dereference_protected(aux->dst.rt_next, 1);
795 } 803 }
796 return ONE; 804 return ONE;
797} 805}
@@ -800,7 +808,8 @@ static void rt_check_expire(void)
800{ 808{
801 static unsigned int rover; 809 static unsigned int rover;
802 unsigned int i = rover, goal; 810 unsigned int i = rover, goal;
803 struct rtable *rth, **rthp; 811 struct rtable *rth;
812 struct rtable __rcu **rthp;
804 unsigned long samples = 0; 813 unsigned long samples = 0;
805 unsigned long sum = 0, sum2 = 0; 814 unsigned long sum = 0, sum2 = 0;
806 unsigned long delta; 815 unsigned long delta;
@@ -826,11 +835,12 @@ static void rt_check_expire(void)
826 835
827 samples++; 836 samples++;
828 837
829 if (*rthp == NULL) 838 if (rcu_dereference_raw(*rthp) == NULL)
830 continue; 839 continue;
831 length = 0; 840 length = 0;
832 spin_lock_bh(rt_hash_lock_addr(i)); 841 spin_lock_bh(rt_hash_lock_addr(i));
833 while ((rth = *rthp) != NULL) { 842 while ((rth = rcu_dereference_protected(*rthp,
843 lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
834 prefetch(rth->dst.rt_next); 844 prefetch(rth->dst.rt_next);
835 if (rt_is_expired(rth)) { 845 if (rt_is_expired(rth)) {
836 *rthp = rth->dst.rt_next; 846 *rthp = rth->dst.rt_next;
@@ -942,9 +952,11 @@ static int rt_garbage_collect(struct dst_ops *ops)
942 static unsigned long last_gc; 952 static unsigned long last_gc;
943 static int rover; 953 static int rover;
944 static int equilibrium; 954 static int equilibrium;
945 struct rtable *rth, **rthp; 955 struct rtable *rth;
956 struct rtable __rcu **rthp;
946 unsigned long now = jiffies; 957 unsigned long now = jiffies;
947 int goal; 958 int goal;
959 int entries = dst_entries_get_fast(&ipv4_dst_ops);
948 960
949 /* 961 /*
950 * Garbage collection is pretty expensive, 962 * Garbage collection is pretty expensive,
@@ -954,28 +966,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
954 RT_CACHE_STAT_INC(gc_total); 966 RT_CACHE_STAT_INC(gc_total);
955 967
956 if (now - last_gc < ip_rt_gc_min_interval && 968 if (now - last_gc < ip_rt_gc_min_interval &&
957 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 969 entries < ip_rt_max_size) {
958 RT_CACHE_STAT_INC(gc_ignored); 970 RT_CACHE_STAT_INC(gc_ignored);
959 goto out; 971 goto out;
960 } 972 }
961 973
974 entries = dst_entries_get_slow(&ipv4_dst_ops);
962 /* Calculate number of entries, which we want to expire now. */ 975 /* Calculate number of entries, which we want to expire now. */
963 goal = atomic_read(&ipv4_dst_ops.entries) - 976 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
964 (ip_rt_gc_elasticity << rt_hash_log);
965 if (goal <= 0) { 977 if (goal <= 0) {
966 if (equilibrium < ipv4_dst_ops.gc_thresh) 978 if (equilibrium < ipv4_dst_ops.gc_thresh)
967 equilibrium = ipv4_dst_ops.gc_thresh; 979 equilibrium = ipv4_dst_ops.gc_thresh;
968 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 980 goal = entries - equilibrium;
969 if (goal > 0) { 981 if (goal > 0) {
970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); 982 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
971 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 983 goal = entries - equilibrium;
972 } 984 }
973 } else { 985 } else {
974 /* We are in dangerous area. Try to reduce cache really 986 /* We are in dangerous area. Try to reduce cache really
975 * aggressively. 987 * aggressively.
976 */ 988 */
977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); 989 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
978 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 990 equilibrium = entries - goal;
979 } 991 }
980 992
981 if (now - last_gc >= ip_rt_gc_min_interval) 993 if (now - last_gc >= ip_rt_gc_min_interval)
@@ -995,7 +1007,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
995 k = (k + 1) & rt_hash_mask; 1007 k = (k + 1) & rt_hash_mask;
996 rthp = &rt_hash_table[k].chain; 1008 rthp = &rt_hash_table[k].chain;
997 spin_lock_bh(rt_hash_lock_addr(k)); 1009 spin_lock_bh(rt_hash_lock_addr(k));
998 while ((rth = *rthp) != NULL) { 1010 while ((rth = rcu_dereference_protected(*rthp,
1011 lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
999 if (!rt_is_expired(rth) && 1012 if (!rt_is_expired(rth) &&
1000 !rt_may_expire(rth, tmo, expire)) { 1013 !rt_may_expire(rth, tmo, expire)) {
1001 tmo >>= 1; 1014 tmo >>= 1;
@@ -1032,14 +1045,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 expire >>= 1; 1045 expire >>= 1;
1033#if RT_CACHE_DEBUG >= 2 1046#if RT_CACHE_DEBUG >= 2
1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 1047 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1035 atomic_read(&ipv4_dst_ops.entries), goal, i); 1048 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1036#endif 1049#endif
1037 1050
1038 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1051 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1039 goto out; 1052 goto out;
1040 } while (!in_softirq() && time_before_eq(jiffies, now)); 1053 } while (!in_softirq() && time_before_eq(jiffies, now));
1041 1054
1042 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1055 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1056 goto out;
1057 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out; 1058 goto out;
1044 if (net_ratelimit()) 1059 if (net_ratelimit())
1045 printk(KERN_WARNING "dst cache overflow\n"); 1060 printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1064,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
1049work_done: 1064work_done:
1050 expire += ip_rt_gc_min_interval; 1065 expire += ip_rt_gc_min_interval;
1051 if (expire > ip_rt_gc_timeout || 1066 if (expire > ip_rt_gc_timeout ||
1052 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 1067 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1068 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1053 expire = ip_rt_gc_timeout; 1069 expire = ip_rt_gc_timeout;
1054#if RT_CACHE_DEBUG >= 2 1070#if RT_CACHE_DEBUG >= 2
1055 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 1071 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1056 atomic_read(&ipv4_dst_ops.entries), goal, rover); 1072 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1057#endif 1073#endif
1058out: return 0; 1074out: return 0;
1059} 1075}
@@ -1068,7 +1084,7 @@ static int slow_chain_length(const struct rtable *head)
1068 1084
1069 while (rth) { 1085 while (rth) {
1070 length += has_noalias(head, rth); 1086 length += has_noalias(head, rth);
1071 rth = rth->dst.rt_next; 1087 rth = rcu_dereference_protected(rth->dst.rt_next, 1);
1072 } 1088 }
1073 return length >> FRACT_BITS; 1089 return length >> FRACT_BITS;
1074} 1090}
@@ -1076,9 +1092,9 @@ static int slow_chain_length(const struct rtable *head)
1076static int rt_intern_hash(unsigned hash, struct rtable *rt, 1092static int rt_intern_hash(unsigned hash, struct rtable *rt,
1077 struct rtable **rp, struct sk_buff *skb, int ifindex) 1093 struct rtable **rp, struct sk_buff *skb, int ifindex)
1078{ 1094{
1079 struct rtable *rth, **rthp; 1095 struct rtable *rth, *cand;
1096 struct rtable __rcu **rthp, **candp;
1080 unsigned long now; 1097 unsigned long now;
1081 struct rtable *cand, **candp;
1082 u32 min_score; 1098 u32 min_score;
1083 int chain_length; 1099 int chain_length;
1084 int attempts = !in_softirq(); 1100 int attempts = !in_softirq();
@@ -1102,30 +1118,31 @@ restart:
1102 * Note that we do rt_free on this new route entry, so that 1118 * Note that we do rt_free on this new route entry, so that
1103 * once its refcount hits zero, we are still able to reap it 1119 * once its refcount hits zero, we are still able to reap it
1104 * (Thanks Alexey) 1120 * (Thanks Alexey)
1105 * Note also the rt_free uses call_rcu. We don't actually 1121 * Note: To avoid expensive rcu stuff for this uncached dst,
1106 * need rcu protection here, this is just our path to get 1122 * we set DST_NOCACHE so that dst_release() can free dst without
1107 * on the route gc list. 1123 * waiting a grace period.
1108 */ 1124 */
1109 1125
1126 rt->dst.flags |= DST_NOCACHE;
1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1127 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1111 int err = arp_bind_neighbour(&rt->dst); 1128 int err = arp_bind_neighbour(&rt->dst);
1112 if (err) { 1129 if (err) {
1113 if (net_ratelimit()) 1130 if (net_ratelimit())
1114 printk(KERN_WARNING 1131 printk(KERN_WARNING
1115 "Neighbour table failure & not caching routes.\n"); 1132 "Neighbour table failure & not caching routes.\n");
1116 rt_drop(rt); 1133 ip_rt_put(rt);
1117 return err; 1134 return err;
1118 } 1135 }
1119 } 1136 }
1120 1137
1121 rt_free(rt);
1122 goto skip_hashing; 1138 goto skip_hashing;
1123 } 1139 }
1124 1140
1125 rthp = &rt_hash_table[hash].chain; 1141 rthp = &rt_hash_table[hash].chain;
1126 1142
1127 spin_lock_bh(rt_hash_lock_addr(hash)); 1143 spin_lock_bh(rt_hash_lock_addr(hash));
1128 while ((rth = *rthp) != NULL) { 1144 while ((rth = rcu_dereference_protected(*rthp,
1145 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1129 if (rt_is_expired(rth)) { 1146 if (rt_is_expired(rth)) {
1130 *rthp = rth->dst.rt_next; 1147 *rthp = rth->dst.rt_next;
1131 rt_free(rth); 1148 rt_free(rth);
@@ -1268,18 +1285,11 @@ skip_hashing:
1268 1285
1269void rt_bind_peer(struct rtable *rt, int create) 1286void rt_bind_peer(struct rtable *rt, int create)
1270{ 1287{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1288 struct inet_peer *peer;
1273 1289
1274 peer = inet_getpeer(rt->rt_dst, create); 1290 peer = inet_getpeer(rt->rt_dst, create);
1275 1291
1276 spin_lock_bh(&rt_peer_lock); 1292 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1293 inet_putpeer(peer);
1284} 1294}
1285 1295
@@ -1328,12 +1338,14 @@ EXPORT_SYMBOL(__ip_select_ident);
1328 1338
1329static void rt_del(unsigned hash, struct rtable *rt) 1339static void rt_del(unsigned hash, struct rtable *rt)
1330{ 1340{
1331 struct rtable **rthp, *aux; 1341 struct rtable __rcu **rthp;
1342 struct rtable *aux;
1332 1343
1333 rthp = &rt_hash_table[hash].chain; 1344 rthp = &rt_hash_table[hash].chain;
1334 spin_lock_bh(rt_hash_lock_addr(hash)); 1345 spin_lock_bh(rt_hash_lock_addr(hash));
1335 ip_rt_put(rt); 1346 ip_rt_put(rt);
1336 while ((aux = *rthp) != NULL) { 1347 while ((aux = rcu_dereference_protected(*rthp,
1348 lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
1337 if (aux == rt || rt_is_expired(aux)) { 1349 if (aux == rt || rt_is_expired(aux)) {
1338 *rthp = aux->dst.rt_next; 1350 *rthp = aux->dst.rt_next;
1339 rt_free(aux); 1351 rt_free(aux);
@@ -1350,7 +1362,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1350{ 1362{
1351 int i, k; 1363 int i, k;
1352 struct in_device *in_dev = __in_dev_get_rcu(dev); 1364 struct in_device *in_dev = __in_dev_get_rcu(dev);
1353 struct rtable *rth, **rthp; 1365 struct rtable *rth;
1366 struct rtable __rcu **rthp;
1354 __be32 skeys[2] = { saddr, 0 }; 1367 __be32 skeys[2] = { saddr, 0 };
1355 int ikeys[2] = { dev->ifindex, 0 }; 1368 int ikeys[2] = { dev->ifindex, 0 };
1356 struct netevent_redirect netevent; 1369 struct netevent_redirect netevent;
@@ -1383,7 +1396,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1383 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], 1396 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
1384 rt_genid(net)); 1397 rt_genid(net));
1385 1398
1386 rthp=&rt_hash_table[hash].chain; 1399 rthp = &rt_hash_table[hash].chain;
1387 1400
1388 while ((rth = rcu_dereference(*rthp)) != NULL) { 1401 while ((rth = rcu_dereference(*rthp)) != NULL) {
1389 struct rtable *rt; 1402 struct rtable *rt;
@@ -1779,12 +1792,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1779 1792
1780 if (rt->fl.iif == 0) 1793 if (rt->fl.iif == 0)
1781 src = rt->rt_src; 1794 src = rt->rt_src;
1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { 1795 else {
1783 src = FIB_RES_PREFSRC(res); 1796 rcu_read_lock();
1784 fib_res_put(&res); 1797 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
1785 } else 1798 src = FIB_RES_PREFSRC(res);
1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1799 else
1800 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1787 RT_SCOPE_UNIVERSE); 1801 RT_SCOPE_UNIVERSE);
1802 rcu_read_unlock();
1803 }
1788 memcpy(addr, &src, 4); 1804 memcpy(addr, &src, 4);
1789} 1805}
1790 1806
@@ -2087,6 +2103,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2087 * Such approach solves two big problems: 2103 * Such approach solves two big problems:
2088 * 1. Not simplex devices are handled properly. 2104 * 1. Not simplex devices are handled properly.
2089 * 2. IP spoofing attempts are filtered with 100% of guarantee. 2105 * 2. IP spoofing attempts are filtered with 100% of guarantee.
2106 * called with rcu_read_lock()
2090 */ 2107 */
2091 2108
2092static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2109static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2108,7 +2125,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2108 unsigned hash; 2125 unsigned hash;
2109 __be32 spec_dst; 2126 __be32 spec_dst;
2110 int err = -EINVAL; 2127 int err = -EINVAL;
2111 int free_res = 0;
2112 struct net * net = dev_net(dev); 2128 struct net * net = dev_net(dev);
2113 2129
2114 /* IP on this device is disabled. */ 2130 /* IP on this device is disabled. */
@@ -2124,7 +2140,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2124 ipv4_is_loopback(saddr)) 2140 ipv4_is_loopback(saddr))
2125 goto martian_source; 2141 goto martian_source;
2126 2142
2127 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) 2143 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
2128 goto brd_input; 2144 goto brd_input;
2129 2145
2130 /* Accept zero addresses only to limited broadcast; 2146 /* Accept zero addresses only to limited broadcast;
@@ -2133,19 +2149,18 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2133 if (ipv4_is_zeronet(saddr)) 2149 if (ipv4_is_zeronet(saddr))
2134 goto martian_source; 2150 goto martian_source;
2135 2151
2136 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) || 2152 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
2137 ipv4_is_loopback(daddr))
2138 goto martian_destination; 2153 goto martian_destination;
2139 2154
2140 /* 2155 /*
2141 * Now we are ready to route packet. 2156 * Now we are ready to route packet.
2142 */ 2157 */
2143 if ((err = fib_lookup(net, &fl, &res)) != 0) { 2158 err = fib_lookup(net, &fl, &res);
2159 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2160 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2161 goto e_hostunreach;
2146 goto no_route; 2162 goto no_route;
2147 } 2163 }
2148 free_res = 1;
2149 2164
2150 RT_CACHE_STAT_INC(in_slow_tot); 2165 RT_CACHE_STAT_INC(in_slow_tot);
2151 2166
@@ -2154,8 +2169,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2154 2169
2155 if (res.type == RTN_LOCAL) { 2170 if (res.type == RTN_LOCAL) {
2156 err = fib_validate_source(saddr, daddr, tos, 2171 err = fib_validate_source(saddr, daddr, tos,
2157 net->loopback_dev->ifindex, 2172 net->loopback_dev->ifindex,
2158 dev, &spec_dst, &itag, skb->mark); 2173 dev, &spec_dst, &itag, skb->mark);
2159 if (err < 0) 2174 if (err < 0)
2160 goto martian_source_keep_err; 2175 goto martian_source_keep_err;
2161 if (err) 2176 if (err)
@@ -2170,9 +2185,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2170 goto martian_destination; 2185 goto martian_destination;
2171 2186
2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2187 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2173done:
2174 if (free_res)
2175 fib_res_put(&res);
2176out: return err; 2188out: return err;
2177 2189
2178brd_input: 2190brd_input:
@@ -2232,7 +2244,7 @@ local_input:
2232 rth->rt_type = res.type; 2244 rth->rt_type = res.type;
2233 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2245 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2234 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2246 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2235 goto done; 2247 goto out;
2236 2248
2237no_route: 2249no_route:
2238 RT_CACHE_STAT_INC(in_no_route); 2250 RT_CACHE_STAT_INC(in_no_route);
@@ -2255,21 +2267,21 @@ martian_destination:
2255 2267
2256e_hostunreach: 2268e_hostunreach:
2257 err = -EHOSTUNREACH; 2269 err = -EHOSTUNREACH;
2258 goto done; 2270 goto out;
2259 2271
2260e_inval: 2272e_inval:
2261 err = -EINVAL; 2273 err = -EINVAL;
2262 goto done; 2274 goto out;
2263 2275
2264e_nobufs: 2276e_nobufs:
2265 err = -ENOBUFS; 2277 err = -ENOBUFS;
2266 goto done; 2278 goto out;
2267 2279
2268martian_source: 2280martian_source:
2269 err = -EINVAL; 2281 err = -EINVAL;
2270martian_source_keep_err: 2282martian_source_keep_err:
2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2283 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2272 goto done; 2284 goto out;
2273} 2285}
2274 2286
2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2287int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2355,6 +2367,7 @@ skip_cache:
2355} 2367}
2356EXPORT_SYMBOL(ip_route_input_common); 2368EXPORT_SYMBOL(ip_route_input_common);
2357 2369
2370/* called with rcu_read_lock() */
2358static int __mkroute_output(struct rtable **result, 2371static int __mkroute_output(struct rtable **result,
2359 struct fib_result *res, 2372 struct fib_result *res,
2360 const struct flowi *fl, 2373 const struct flowi *fl,
@@ -2365,53 +2378,47 @@ static int __mkroute_output(struct rtable **result,
2365 struct rtable *rth; 2378 struct rtable *rth;
2366 struct in_device *in_dev; 2379 struct in_device *in_dev;
2367 u32 tos = RT_FL_TOS(oldflp); 2380 u32 tos = RT_FL_TOS(oldflp);
2368 int err = 0;
2369 2381
2370 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2382 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2371 return -EINVAL; 2383 return -EINVAL;
2372 2384
2373 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2385 if (ipv4_is_lbcast(fl->fl4_dst))
2374 res->type = RTN_BROADCAST; 2386 res->type = RTN_BROADCAST;
2375 else if (ipv4_is_multicast(fl->fl4_dst)) 2387 else if (ipv4_is_multicast(fl->fl4_dst))
2376 res->type = RTN_MULTICAST; 2388 res->type = RTN_MULTICAST;
2377 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst)) 2389 else if (ipv4_is_zeronet(fl->fl4_dst))
2378 return -EINVAL; 2390 return -EINVAL;
2379 2391
2380 if (dev_out->flags & IFF_LOOPBACK) 2392 if (dev_out->flags & IFF_LOOPBACK)
2381 flags |= RTCF_LOCAL; 2393 flags |= RTCF_LOCAL;
2382 2394
2383 /* get work reference to inet device */ 2395 in_dev = __in_dev_get_rcu(dev_out);
2384 in_dev = in_dev_get(dev_out);
2385 if (!in_dev) 2396 if (!in_dev)
2386 return -EINVAL; 2397 return -EINVAL;
2387 2398
2388 if (res->type == RTN_BROADCAST) { 2399 if (res->type == RTN_BROADCAST) {
2389 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2400 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2390 if (res->fi) { 2401 res->fi = NULL;
2391 fib_info_put(res->fi);
2392 res->fi = NULL;
2393 }
2394 } else if (res->type == RTN_MULTICAST) { 2402 } else if (res->type == RTN_MULTICAST) {
2395 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2403 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2396 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2404 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2397 oldflp->proto)) 2405 oldflp->proto))
2398 flags &= ~RTCF_LOCAL; 2406 flags &= ~RTCF_LOCAL;
2399 /* If multicast route do not exist use 2407 /* If multicast route do not exist use
2400 default one, but do not gateway in this case. 2408 * default one, but do not gateway in this case.
2401 Yes, it is hack. 2409 * Yes, it is hack.
2402 */ 2410 */
2403 if (res->fi && res->prefixlen < 4) { 2411 if (res->fi && res->prefixlen < 4)
2404 fib_info_put(res->fi);
2405 res->fi = NULL; 2412 res->fi = NULL;
2406 }
2407 } 2413 }
2408 2414
2409 2415
2410 rth = dst_alloc(&ipv4_dst_ops); 2416 rth = dst_alloc(&ipv4_dst_ops);
2411 if (!rth) { 2417 if (!rth)
2412 err = -ENOBUFS; 2418 return -ENOBUFS;
2413 goto cleanup; 2419
2414 } 2420 in_dev_hold(in_dev);
2421 rth->idev = in_dev;
2415 2422
2416 atomic_set(&rth->dst.__refcnt, 1); 2423 atomic_set(&rth->dst.__refcnt, 1);
2417 rth->dst.flags= DST_HOST; 2424 rth->dst.flags= DST_HOST;
@@ -2432,7 +2439,6 @@ static int __mkroute_output(struct rtable **result,
2432 cache entry */ 2439 cache entry */
2433 rth->dst.dev = dev_out; 2440 rth->dst.dev = dev_out;
2434 dev_hold(dev_out); 2441 dev_hold(dev_out);
2435 rth->idev = in_dev_get(dev_out);
2436 rth->rt_gateway = fl->fl4_dst; 2442 rth->rt_gateway = fl->fl4_dst;
2437 rth->rt_spec_dst= fl->fl4_src; 2443 rth->rt_spec_dst= fl->fl4_src;
2438 2444
@@ -2467,15 +2473,11 @@ static int __mkroute_output(struct rtable **result,
2467 rt_set_nexthop(rth, res, 0); 2473 rt_set_nexthop(rth, res, 0);
2468 2474
2469 rth->rt_flags = flags; 2475 rth->rt_flags = flags;
2470
2471 *result = rth; 2476 *result = rth;
2472 cleanup: 2477 return 0;
2473 /* release work reference to inet device */
2474 in_dev_put(in_dev);
2475
2476 return err;
2477} 2478}
2478 2479
2480/* called with rcu_read_lock() */
2479static int ip_mkroute_output(struct rtable **rp, 2481static int ip_mkroute_output(struct rtable **rp,
2480 struct fib_result *res, 2482 struct fib_result *res,
2481 const struct flowi *fl, 2483 const struct flowi *fl,
@@ -2497,6 +2499,7 @@ static int ip_mkroute_output(struct rtable **rp,
2497 2499
2498/* 2500/*
2499 * Major route resolver routine. 2501 * Major route resolver routine.
2502 * called with rcu_read_lock();
2500 */ 2503 */
2501 2504
2502static int ip_route_output_slow(struct net *net, struct rtable **rp, 2505static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2515,9 +2518,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2515 .iif = net->loopback_dev->ifindex, 2518 .iif = net->loopback_dev->ifindex,
2516 .oif = oldflp->oif }; 2519 .oif = oldflp->oif };
2517 struct fib_result res; 2520 struct fib_result res;
2518 unsigned flags = 0; 2521 unsigned int flags = 0;
2519 struct net_device *dev_out = NULL; 2522 struct net_device *dev_out = NULL;
2520 int free_res = 0;
2521 int err; 2523 int err;
2522 2524
2523 2525
@@ -2543,9 +2545,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 2545
2544 if (oldflp->oif == 0 && 2546 if (oldflp->oif == 0 &&
2545 (ipv4_is_multicast(oldflp->fl4_dst) || 2547 (ipv4_is_multicast(oldflp->fl4_dst) ||
2546 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2548 ipv4_is_lbcast(oldflp->fl4_dst))) {
2547 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2549 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2548 dev_out = ip_dev_find(net, oldflp->fl4_src); 2550 dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
2549 if (dev_out == NULL) 2551 if (dev_out == NULL)
2550 goto out; 2552 goto out;
2551 2553
@@ -2570,29 +2572,24 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2570 2572
2571 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2573 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2572 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2574 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2573 dev_out = ip_dev_find(net, oldflp->fl4_src); 2575 if (!__ip_dev_find(net, oldflp->fl4_src, false))
2574 if (dev_out == NULL)
2575 goto out; 2576 goto out;
2576 dev_put(dev_out);
2577 dev_out = NULL;
2578 } 2577 }
2579 } 2578 }
2580 2579
2581 2580
2582 if (oldflp->oif) { 2581 if (oldflp->oif) {
2583 dev_out = dev_get_by_index(net, oldflp->oif); 2582 dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2584 err = -ENODEV; 2583 err = -ENODEV;
2585 if (dev_out == NULL) 2584 if (dev_out == NULL)
2586 goto out; 2585 goto out;
2587 2586
2588 /* RACE: Check return value of inet_select_addr instead. */ 2587 /* RACE: Check return value of inet_select_addr instead. */
2589 if (__in_dev_get_rtnl(dev_out) == NULL) { 2588 if (rcu_dereference(dev_out->ip_ptr) == NULL)
2590 dev_put(dev_out);
2591 goto out; /* Wrong error code */ 2589 goto out; /* Wrong error code */
2592 }
2593 2590
2594 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2591 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2595 oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2592 ipv4_is_lbcast(oldflp->fl4_dst)) {
2596 if (!fl.fl4_src) 2593 if (!fl.fl4_src)
2597 fl.fl4_src = inet_select_addr(dev_out, 0, 2594 fl.fl4_src = inet_select_addr(dev_out, 0,
2598 RT_SCOPE_LINK); 2595 RT_SCOPE_LINK);
@@ -2612,10 +2609,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2612 fl.fl4_dst = fl.fl4_src; 2609 fl.fl4_dst = fl.fl4_src;
2613 if (!fl.fl4_dst) 2610 if (!fl.fl4_dst)
2614 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2611 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2615 if (dev_out)
2616 dev_put(dev_out);
2617 dev_out = net->loopback_dev; 2612 dev_out = net->loopback_dev;
2618 dev_hold(dev_out);
2619 fl.oif = net->loopback_dev->ifindex; 2613 fl.oif = net->loopback_dev->ifindex;
2620 res.type = RTN_LOCAL; 2614 res.type = RTN_LOCAL;
2621 flags |= RTCF_LOCAL; 2615 flags |= RTCF_LOCAL;
@@ -2649,23 +2643,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2649 res.type = RTN_UNICAST; 2643 res.type = RTN_UNICAST;
2650 goto make_route; 2644 goto make_route;
2651 } 2645 }
2652 if (dev_out)
2653 dev_put(dev_out);
2654 err = -ENETUNREACH; 2646 err = -ENETUNREACH;
2655 goto out; 2647 goto out;
2656 } 2648 }
2657 free_res = 1;
2658 2649
2659 if (res.type == RTN_LOCAL) { 2650 if (res.type == RTN_LOCAL) {
2660 if (!fl.fl4_src) 2651 if (!fl.fl4_src)
2661 fl.fl4_src = fl.fl4_dst; 2652 fl.fl4_src = fl.fl4_dst;
2662 if (dev_out)
2663 dev_put(dev_out);
2664 dev_out = net->loopback_dev; 2653 dev_out = net->loopback_dev;
2665 dev_hold(dev_out);
2666 fl.oif = dev_out->ifindex; 2654 fl.oif = dev_out->ifindex;
2667 if (res.fi)
2668 fib_info_put(res.fi);
2669 res.fi = NULL; 2655 res.fi = NULL;
2670 flags |= RTCF_LOCAL; 2656 flags |= RTCF_LOCAL;
2671 goto make_route; 2657 goto make_route;
@@ -2682,28 +2668,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2682 if (!fl.fl4_src) 2668 if (!fl.fl4_src)
2683 fl.fl4_src = FIB_RES_PREFSRC(res); 2669 fl.fl4_src = FIB_RES_PREFSRC(res);
2684 2670
2685 if (dev_out)
2686 dev_put(dev_out);
2687 dev_out = FIB_RES_DEV(res); 2671 dev_out = FIB_RES_DEV(res);
2688 dev_hold(dev_out);
2689 fl.oif = dev_out->ifindex; 2672 fl.oif = dev_out->ifindex;
2690 2673
2691 2674
2692make_route: 2675make_route:
2693 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2676 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2694 2677
2695
2696 if (free_res)
2697 fib_res_put(&res);
2698 if (dev_out)
2699 dev_put(dev_out);
2700out: return err; 2678out: return err;
2701} 2679}
2702 2680
2703int __ip_route_output_key(struct net *net, struct rtable **rp, 2681int __ip_route_output_key(struct net *net, struct rtable **rp,
2704 const struct flowi *flp) 2682 const struct flowi *flp)
2705{ 2683{
2706 unsigned hash; 2684 unsigned int hash;
2685 int res;
2707 struct rtable *rth; 2686 struct rtable *rth;
2708 2687
2709 if (!rt_caching(net)) 2688 if (!rt_caching(net))
@@ -2734,7 +2713,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2734 rcu_read_unlock_bh(); 2713 rcu_read_unlock_bh();
2735 2714
2736slow_output: 2715slow_output:
2737 return ip_route_output_slow(net, rp, flp); 2716 rcu_read_lock();
2717 res = ip_route_output_slow(net, rp, flp);
2718 rcu_read_unlock();
2719 return res;
2738} 2720}
2739EXPORT_SYMBOL_GPL(__ip_route_output_key); 2721EXPORT_SYMBOL_GPL(__ip_route_output_key);
2740 2722
@@ -2753,7 +2735,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2753 .destroy = ipv4_dst_destroy, 2735 .destroy = ipv4_dst_destroy,
2754 .check = ipv4_blackhole_dst_check, 2736 .check = ipv4_blackhole_dst_check,
2755 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2737 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2756 .entries = ATOMIC_INIT(0),
2757}; 2738};
2758 2739
2759 2740
@@ -2798,7 +2779,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2798 2779
2799 dst_release(&(*rp)->dst); 2780 dst_release(&(*rp)->dst);
2800 *rp = rt; 2781 *rp = rt;
2801 return (rt ? 0 : -ENOMEM); 2782 return rt ? 0 : -ENOMEM;
2802} 2783}
2803 2784
2804int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2785int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
@@ -3323,6 +3304,12 @@ int __init ip_rt_init(void)
3323 3304
3324 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 3305 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3325 3306
3307 if (dst_entries_init(&ipv4_dst_ops) < 0)
3308 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3309
3310 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3311 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3312
3326 rt_hash_table = (struct rt_hash_bucket *) 3313 rt_hash_table = (struct rt_hash_bucket *)
3327 alloc_large_system_hash("IP route cache", 3314 alloc_large_system_hash("IP route cache",
3328 sizeof(struct rt_hash_bucket), 3315 sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d96c1da4b17c..e91911d7aae2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -398,7 +398,7 @@ static struct ctl_table ipv4_table[] = {
398 .data = &sysctl_tcp_mem, 398 .data = &sysctl_tcp_mem,
399 .maxlen = sizeof(sysctl_tcp_mem), 399 .maxlen = sizeof(sysctl_tcp_mem),
400 .mode = 0644, 400 .mode = 0644,
401 .proc_handler = proc_dointvec 401 .proc_handler = proc_doulongvec_minmax
402 }, 402 },
403 { 403 {
404 .procname = "tcp_wmem", 404 .procname = "tcp_wmem",
@@ -602,8 +602,7 @@ static struct ctl_table ipv4_table[] = {
602 .data = &sysctl_udp_mem, 602 .data = &sysctl_udp_mem,
603 .maxlen = sizeof(sysctl_udp_mem), 603 .maxlen = sizeof(sysctl_udp_mem),
604 .mode = 0644, 604 .mode = 0644,
605 .proc_handler = proc_dointvec_minmax, 605 .proc_handler = proc_doulongvec_minmax,
606 .extra1 = &zero
607 }, 606 },
608 { 607 {
609 .procname = "udp_rmem_min", 608 .procname = "udp_rmem_min",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f115ea68a4ef..081419969485 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,7 +282,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
282struct percpu_counter tcp_orphan_count; 282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count); 283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284 284
285int sysctl_tcp_mem[3] __read_mostly; 285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly; 286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly; 287int sysctl_tcp_rmem[3] __read_mostly;
288 288
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem); 290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem); 291EXPORT_SYMBOL(sysctl_tcp_wmem);
292 292
293atomic_t tcp_memory_allocated; /* Current allocated memory. */ 293atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
294EXPORT_SYMBOL(tcp_memory_allocated); 294EXPORT_SYMBOL(tcp_memory_allocated);
295 295
296/* 296/*
@@ -2246,7 +2246,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2246 /* Values greater than interface MTU won't take effect. However 2246 /* Values greater than interface MTU won't take effect. However
2247 * at the point when this call is done we typically don't yet 2247 * at the point when this call is done we typically don't yet
2248 * know which interface is going to be used */ 2248 * know which interface is going to be used */
2249 if (val < 8 || val > MAX_TCP_WINDOW) { 2249 if (val < 64 || val > MAX_TCP_WINDOW) {
2250 err = -EINVAL; 2250 err = -EINVAL;
2251 break; 2251 break;
2252 } 2252 }
@@ -2392,7 +2392,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2392 err = tp->af_specific->md5_parse(sk, optval, optlen); 2392 err = tp->af_specific->md5_parse(sk, optval, optlen);
2393 break; 2393 break;
2394#endif 2394#endif
2395 2395 case TCP_USER_TIMEOUT:
2396 /* Cap the max timeout in ms TCP will retry/retrans
2397 * before giving up and aborting (ETIMEDOUT) a connection.
2398 */
2399 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2400 break;
2396 default: 2401 default:
2397 err = -ENOPROTOOPT; 2402 err = -ENOPROTOOPT;
2398 break; 2403 break;
@@ -2611,6 +2616,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2611 case TCP_THIN_DUPACK: 2616 case TCP_THIN_DUPACK:
2612 val = tp->thin_dupack; 2617 val = tp->thin_dupack;
2613 break; 2618 break;
2619
2620 case TCP_USER_TIMEOUT:
2621 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2622 break;
2614 default: 2623 default:
2615 return -ENOPROTOOPT; 2624 return -ENOPROTOOPT;
2616 } 2625 }
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 1eba160b72dc..00ca688d8964 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
6 * The algorithm is described in: 6 * The algorithm is described in:
7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm 7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
8 * for High-Speed Networks" 8 * for High-Speed Networks"
9 * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf 9 * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
10 * 10 *
11 * Implemented from description in paper and ns-2 simulation. 11 * Implemented from description in paper and ns-2 simulation.
12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> 12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b55f60f6fcbe..6d8ab1c4efc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); 182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183} 183}
184 184
185void tcp_enter_quickack_mode(struct sock *sk) 185static void tcp_enter_quickack_mode(struct sock *sk)
186{ 186{
187 struct inet_connection_sock *icsk = inet_csk(sk); 187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk); 188 tcp_incr_quickack(sk);
@@ -259,8 +259,11 @@ static void tcp_fixup_sndbuf(struct sock *sk)
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
260 sizeof(struct sk_buff); 260 sizeof(struct sk_buff);
261 261
262 if (sk->sk_sndbuf < 3 * sndmem) 262 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = min(3 * sndmem, sysctl_tcp_wmem[2]); 263 sk->sk_sndbuf = 3 * sndmem;
264 if (sk->sk_sndbuf > sysctl_tcp_wmem[2])
265 sk->sk_sndbuf = sysctl_tcp_wmem[2];
266 }
264} 267}
265 268
266/* 2. Tuning advertised window (window_clamp, rcv_ssthresh) 269/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -396,7 +399,7 @@ static void tcp_clamp_window(struct sock *sk)
396 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && 399 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
397 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && 400 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
398 !tcp_memory_pressure && 401 !tcp_memory_pressure &&
399 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { 402 atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
400 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), 403 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
401 sysctl_tcp_rmem[2]); 404 sysctl_tcp_rmem[2]);
402 } 405 }
@@ -428,10 +431,10 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
428 * 431 *
429 * The algorithm for RTT estimation w/o timestamps is based on 432 * The algorithm for RTT estimation w/o timestamps is based on
430 * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. 433 * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL.
431 * <http://www.lanl.gov/radiant/website/pubs/drs/lacsi2001.ps> 434 * <http://public.lanl.gov/radiant/pubs.html#DRS>
432 * 435 *
433 * More detail on this code can be found at 436 * More detail on this code can be found at
434 * <http://www.psc.edu/~jheffner/senior_thesis.ps>, 437 * <http://staff.psc.edu/jheffner/>,
435 * though this reference is out of date. A new paper 438 * though this reference is out of date. A new paper
436 * is pending. 439 * is pending.
437 */ 440 */
@@ -805,25 +808,12 @@ void tcp_update_metrics(struct sock *sk)
805 } 808 }
806} 809}
807 810
808/* Numbers are taken from RFC3390.
809 *
810 * John Heffner states:
811 *
812 * The RFC specifies a window of no more than 4380 bytes
813 * unless 2*MSS > 4380. Reading the pseudocode in the RFC
814 * is a bit misleading because they use a clamp at 4380 bytes
815 * rather than use a multiplier in the relevant range.
816 */
817__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 811__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
818{ 812{
819 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 813 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
820 814
821 if (!cwnd) { 815 if (!cwnd)
822 if (tp->mss_cache > 1460) 816 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
823 cwnd = 2;
824 else
825 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
826 }
827 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 817 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
828} 818}
829 819
@@ -2314,7 +2304,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2314 2304
2315static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2305static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2316{ 2306{
2317 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 2307 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2318} 2308}
2319 2309
2320static inline int tcp_head_timedout(struct sock *sk) 2310static inline int tcp_head_timedout(struct sock *sk)
@@ -2508,7 +2498,7 @@ static void tcp_timeout_skbs(struct sock *sk)
2508/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2498/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2509 * is against sacked "cnt", otherwise it's against facked "cnt" 2499 * is against sacked "cnt", otherwise it's against facked "cnt"
2510 */ 2500 */
2511static void tcp_mark_head_lost(struct sock *sk, int packets) 2501static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2512{ 2502{
2513 struct tcp_sock *tp = tcp_sk(sk); 2503 struct tcp_sock *tp = tcp_sk(sk);
2514 struct sk_buff *skb; 2504 struct sk_buff *skb;
@@ -2516,13 +2506,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2516 int err; 2506 int err;
2517 unsigned int mss; 2507 unsigned int mss;
2518 2508
2519 if (packets == 0)
2520 return;
2521
2522 WARN_ON(packets > tp->packets_out); 2509 WARN_ON(packets > tp->packets_out);
2523 if (tp->lost_skb_hint) { 2510 if (tp->lost_skb_hint) {
2524 skb = tp->lost_skb_hint; 2511 skb = tp->lost_skb_hint;
2525 cnt = tp->lost_cnt_hint; 2512 cnt = tp->lost_cnt_hint;
2513 /* Head already handled? */
2514 if (mark_head && skb != tcp_write_queue_head(sk))
2515 return;
2526 } else { 2516 } else {
2527 skb = tcp_write_queue_head(sk); 2517 skb = tcp_write_queue_head(sk);
2528 cnt = 0; 2518 cnt = 0;
@@ -2557,6 +2547,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2557 } 2547 }
2558 2548
2559 tcp_skb_mark_lost(tp, skb); 2549 tcp_skb_mark_lost(tp, skb);
2550
2551 if (mark_head)
2552 break;
2560 } 2553 }
2561 tcp_verify_left_out(tp); 2554 tcp_verify_left_out(tp);
2562} 2555}
@@ -2568,17 +2561,18 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2568 struct tcp_sock *tp = tcp_sk(sk); 2561 struct tcp_sock *tp = tcp_sk(sk);
2569 2562
2570 if (tcp_is_reno(tp)) { 2563 if (tcp_is_reno(tp)) {
2571 tcp_mark_head_lost(sk, 1); 2564 tcp_mark_head_lost(sk, 1, 1);
2572 } else if (tcp_is_fack(tp)) { 2565 } else if (tcp_is_fack(tp)) {
2573 int lost = tp->fackets_out - tp->reordering; 2566 int lost = tp->fackets_out - tp->reordering;
2574 if (lost <= 0) 2567 if (lost <= 0)
2575 lost = 1; 2568 lost = 1;
2576 tcp_mark_head_lost(sk, lost); 2569 tcp_mark_head_lost(sk, lost, 0);
2577 } else { 2570 } else {
2578 int sacked_upto = tp->sacked_out - tp->reordering; 2571 int sacked_upto = tp->sacked_out - tp->reordering;
2579 if (sacked_upto < fast_rexmit) 2572 if (sacked_upto >= 0)
2580 sacked_upto = fast_rexmit; 2573 tcp_mark_head_lost(sk, sacked_upto, 0);
2581 tcp_mark_head_lost(sk, sacked_upto); 2574 else if (fast_rexmit)
2575 tcp_mark_head_lost(sk, 1, 1);
2582 } 2576 }
2583 2577
2584 tcp_timeout_skbs(sk); 2578 tcp_timeout_skbs(sk);
@@ -2887,7 +2881,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
2887 icsk->icsk_mtup.probe_size; 2881 icsk->icsk_mtup.probe_size;
2888 tp->snd_cwnd_cnt = 0; 2882 tp->snd_cwnd_cnt = 0;
2889 tp->snd_cwnd_stamp = tcp_time_stamp; 2883 tp->snd_cwnd_stamp = tcp_time_stamp;
2890 tp->rcv_ssthresh = tcp_current_ssthresh(sk); 2884 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2891 2885
2892 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; 2886 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2893 icsk->icsk_mtup.probe_size = 0; 2887 icsk->icsk_mtup.probe_size = 0;
@@ -2984,7 +2978,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2984 before(tp->snd_una, tp->high_seq) && 2978 before(tp->snd_una, tp->high_seq) &&
2985 icsk->icsk_ca_state != TCP_CA_Open && 2979 icsk->icsk_ca_state != TCP_CA_Open &&
2986 tp->fackets_out > tp->reordering) { 2980 tp->fackets_out > tp->reordering) {
2987 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2981 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); 2982 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2989 } 2983 }
2990 2984
@@ -3412,8 +3406,8 @@ static void tcp_ack_probe(struct sock *sk)
3412 3406
3413static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) 3407static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3414{ 3408{
3415 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || 3409 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3416 inet_csk(sk)->icsk_ca_state != TCP_CA_Open); 3410 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3417} 3411}
3418 3412
3419static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3413static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3430,9 +3424,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3430 const u32 ack, const u32 ack_seq, 3424 const u32 ack, const u32 ack_seq,
3431 const u32 nwin) 3425 const u32 nwin)
3432{ 3426{
3433 return (after(ack, tp->snd_una) || 3427 return after(ack, tp->snd_una) ||
3434 after(ack_seq, tp->snd_wl1) || 3428 after(ack_seq, tp->snd_wl1) ||
3435 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); 3429 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3436} 3430}
3437 3431
3438/* Update our send window. 3432/* Update our send window.
@@ -4870,7 +4864,7 @@ static int tcp_should_expand_sndbuf(struct sock *sk)
4870 return 0; 4864 return 0;
4871 4865
4872 /* If we are under soft global TCP memory pressure, do not expand. */ 4866 /* If we are under soft global TCP memory pressure, do not expand. */
4873 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) 4867 if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
4874 return 0; 4868 return 0;
4875 4869
4876 /* If we filled the congestion window, do not expand. */ 4870 /* If we filled the congestion window, do not expand. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 020766292bb0..69ccbc1dde9c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -415,6 +415,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
415 !icsk->icsk_backoff) 415 !icsk->icsk_backoff)
416 break; 416 break;
417 417
418 if (sock_owned_by_user(sk))
419 break;
420
418 icsk->icsk_backoff--; 421 icsk->icsk_backoff--;
419 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 422 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
420 icsk->icsk_backoff; 423 icsk->icsk_backoff;
@@ -429,11 +432,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 if (remaining) { 432 if (remaining) {
430 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 433 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
431 remaining, TCP_RTO_MAX); 434 remaining, TCP_RTO_MAX);
432 } else if (sock_owned_by_user(sk)) {
433 /* RTO revert clocked out retransmission,
434 * but socket is locked. Will defer. */
435 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
436 HZ/20, TCP_RTO_MAX);
437 } else { 435 } else {
438 /* RTO revert clocked out retransmission. 436 /* RTO revert clocked out retransmission.
439 * Will retransmit now */ 437 * Will retransmit now */
@@ -1422,7 +1420,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1422 1420
1423 newsk = tcp_create_openreq_child(sk, req, skb); 1421 newsk = tcp_create_openreq_child(sk, req, skb);
1424 if (!newsk) 1422 if (!newsk)
1425 goto exit; 1423 goto exit_nonewsk;
1426 1424
1427 newsk->sk_gso_type = SKB_GSO_TCPV4; 1425 newsk->sk_gso_type = SKB_GSO_TCPV4;
1428 sk_setup_caps(newsk, dst); 1426 sk_setup_caps(newsk, dst);
@@ -1469,16 +1467,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 } 1467 }
1470#endif 1468#endif
1471 1469
1470 if (__inet_inherit_port(sk, newsk) < 0) {
1471 sock_put(newsk);
1472 goto exit;
1473 }
1472 __inet_hash_nolisten(newsk, NULL); 1474 __inet_hash_nolisten(newsk, NULL);
1473 __inet_inherit_port(sk, newsk);
1474 1475
1475 return newsk; 1476 return newsk;
1476 1477
1477exit_overflow: 1478exit_overflow:
1478 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1479 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1480exit_nonewsk:
1481 dst_release(dst);
1479exit: 1482exit:
1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1483 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1481 dst_release(dst);
1482 return NULL; 1484 return NULL;
1483} 1485}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1486EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
@@ -2571,7 +2573,6 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2571 2573
2572 return tcp_gro_receive(head, skb); 2574 return tcp_gro_receive(head, skb);
2573} 2575}
2574EXPORT_SYMBOL(tcp4_gro_receive);
2575 2576
2576int tcp4_gro_complete(struct sk_buff *skb) 2577int tcp4_gro_complete(struct sk_buff *skb)
2577{ 2578{
@@ -2584,7 +2585,6 @@ int tcp4_gro_complete(struct sk_buff *skb)
2584 2585
2585 return tcp_gro_complete(skb); 2586 return tcp_gro_complete(skb);
2586} 2587}
2587EXPORT_SYMBOL(tcp4_gro_complete);
2588 2588
2589struct proto tcp_prot = { 2589struct proto tcp_prot = {
2590 .name = "TCP", 2590 .name = "TCP",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f25b56cb85cb..43cf901d7659 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
55 return 1; 55 return 1;
56 if (after(end_seq, s_win) && before(seq, e_win)) 56 if (after(end_seq, s_win) && before(seq, e_win))
57 return 1; 57 return 1;
58 return (seq == e_win && seq == end_seq); 58 return seq == e_win && seq == end_seq;
59} 59}
60 60
61/* 61/*
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..05b1ecf36763 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 224 }
225 } 225 }
226 226
227 /* Set initial window to value enough for senders, 227 /* Set initial window to value enough for senders, following RFC5681. */
228 * following RFC2414. Senders, not following this RFC,
229 * will be satisfied with 2.
230 */
231 if (mss > (1 << *rcv_wscale)) { 228 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 229 int init_cwnd = rfc3390_bytes_to_packets(mss);
233 if (mss > 1460 * 3) 230
234 init_cwnd = 2;
235 else if (mss > 1460)
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 232 * rather than the default from above
239 */ 233 */
@@ -1376,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1370 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1371 unsigned mss_now, int nonagle)
1378{ 1372{
1379 return (skb->len < mss_now && 1373 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1374 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1375 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1376}
1383 1377
1384/* Return non-zero if the Nagle test allows this packet to be 1378/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1443 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1444 struct sk_buff *skb = tcp_send_head(sk);
1451 1445
1452 return (skb && 1446 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1447 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1448 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1449 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1450}
1457 1451
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1452/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -2429,6 +2423,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2423 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2424 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2425 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2426
2427 /* limit the window selection if the user enforce a smaller rx buffer */
2428 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2429 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2430 req->window_clamp = tcp_full_space(sk);
2431
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2432 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2433 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2555,6 +2555,11 @@ static void tcp_connect_init(struct sock *sk)
2555 2555
2556 tcp_initialize_rcv_mss(sk); 2556 tcp_initialize_rcv_mss(sk);
2557 2557
2558 /* limit the window selection if the user enforce a smaller rx buffer */
2559 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2560 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2561 tp->window_clamp = tcp_full_space(sk);
2562
2558 tcp_select_initial_window(tcp_full_space(sk), 2563 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2564 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2565 &tp->rcv_wnd,
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f8efada580e8..6211e2114173 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -214,6 +214,7 @@ static const struct file_operations tcpprobe_fops = {
214 .owner = THIS_MODULE, 214 .owner = THIS_MODULE,
215 .open = tcpprobe_open, 215 .open = tcpprobe_open,
216 .read = tcpprobe_read, 216 .read = tcpprobe_read,
217 .llseek = noop_llseek,
217}; 218};
218 219
219static __init int tcpprobe_init(void) 220static __init int tcpprobe_init(void)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74c54b30600f..74a6aa003657 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -140,10 +140,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
140 */ 140 */
141static bool retransmits_timed_out(struct sock *sk, 141static bool retransmits_timed_out(struct sock *sk,
142 unsigned int boundary, 142 unsigned int boundary,
143 unsigned int timeout,
143 bool syn_set) 144 bool syn_set)
144{ 145{
145 unsigned int timeout, linear_backoff_thresh; 146 unsigned int linear_backoff_thresh, start_ts;
146 unsigned int start_ts;
147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; 147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
148 148
149 if (!inet_csk(sk)->icsk_retransmits) 149 if (!inet_csk(sk)->icsk_retransmits)
@@ -154,14 +154,15 @@ static bool retransmits_timed_out(struct sock *sk,
154 else 154 else
155 start_ts = tcp_sk(sk)->retrans_stamp; 155 start_ts = tcp_sk(sk)->retrans_stamp;
156 156
157 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 157 if (likely(timeout == 0)) {
158 158 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
159 if (boundary <= linear_backoff_thresh)
160 timeout = ((2 << boundary) - 1) * rto_base;
161 else
162 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
163 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
164 159
160 if (boundary <= linear_backoff_thresh)
161 timeout = ((2 << boundary) - 1) * rto_base;
162 else
163 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
164 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
165 }
165 return (tcp_time_stamp - start_ts) >= timeout; 166 return (tcp_time_stamp - start_ts) >= timeout;
166} 167}
167 168
@@ -178,7 +179,7 @@ static int tcp_write_timeout(struct sock *sk)
178 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 179 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
179 syn_set = 1; 180 syn_set = 1;
180 } else { 181 } else {
181 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { 182 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
182 /* Black hole detection */ 183 /* Black hole detection */
183 tcp_mtu_probing(icsk, sk); 184 tcp_mtu_probing(icsk, sk);
184 185
@@ -191,14 +192,15 @@ static int tcp_write_timeout(struct sock *sk)
191 192
192 retry_until = tcp_orphan_retries(sk, alive); 193 retry_until = tcp_orphan_retries(sk, alive);
193 do_reset = alive || 194 do_reset = alive ||
194 !retransmits_timed_out(sk, retry_until, 0); 195 !retransmits_timed_out(sk, retry_until, 0, 0);
195 196
196 if (tcp_out_of_resources(sk, do_reset)) 197 if (tcp_out_of_resources(sk, do_reset))
197 return 1; 198 return 1;
198 } 199 }
199 } 200 }
200 201
201 if (retransmits_timed_out(sk, retry_until, syn_set)) { 202 if (retransmits_timed_out(sk, retry_until,
203 syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
202 /* Has it gone just too far? */ 204 /* Has it gone just too far? */
203 tcp_write_err(sk); 205 tcp_write_err(sk);
204 return 1; 206 return 1;
@@ -365,18 +367,19 @@ void tcp_retransmit_timer(struct sock *sk)
365 if (icsk->icsk_retransmits == 0) { 367 if (icsk->icsk_retransmits == 0) {
366 int mib_idx; 368 int mib_idx;
367 369
368 if (icsk->icsk_ca_state == TCP_CA_Disorder) { 370 if (icsk->icsk_ca_state == TCP_CA_Recovery) {
369 if (tcp_is_sack(tp))
370 mib_idx = LINUX_MIB_TCPSACKFAILURES;
371 else
372 mib_idx = LINUX_MIB_TCPRENOFAILURES;
373 } else if (icsk->icsk_ca_state == TCP_CA_Recovery) {
374 if (tcp_is_sack(tp)) 371 if (tcp_is_sack(tp))
375 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 372 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
376 else 373 else
377 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 374 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
378 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 375 } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
379 mib_idx = LINUX_MIB_TCPLOSSFAILURES; 376 mib_idx = LINUX_MIB_TCPLOSSFAILURES;
377 } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
378 tp->sacked_out) {
379 if (tcp_is_sack(tp))
380 mib_idx = LINUX_MIB_TCPSACKFAILURES;
381 else
382 mib_idx = LINUX_MIB_TCPRENOFAILURES;
380 } else { 383 } else {
381 mib_idx = LINUX_MIB_TCPTIMEOUTS; 384 mib_idx = LINUX_MIB_TCPTIMEOUTS;
382 } 385 }
@@ -440,7 +443,7 @@ out_reset_timer:
440 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 443 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
441 } 444 }
442 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
443 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) 446 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
444 __sk_dst_reset(sk); 447 __sk_dst_reset(sk);
445 448
446out:; 449out:;
@@ -560,7 +563,14 @@ static void tcp_keepalive_timer (unsigned long data)
560 elapsed = keepalive_time_elapsed(tp); 563 elapsed = keepalive_time_elapsed(tp);
561 564
562 if (elapsed >= keepalive_time_when(tp)) { 565 if (elapsed >= keepalive_time_when(tp)) {
563 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 566 /* If the TCP_USER_TIMEOUT option is enabled, use that
567 * to determine when to timeout instead.
568 */
569 if ((icsk->icsk_user_timeout != 0 &&
570 elapsed >= icsk->icsk_user_timeout &&
571 icsk->icsk_probes_out > 0) ||
572 (icsk->icsk_user_timeout == 0 &&
573 icsk->icsk_probes_out >= keepalive_probes(tp))) {
564 tcp_send_active_reset(sk, GFP_ATOMIC); 574 tcp_send_active_reset(sk, GFP_ATOMIC);
565 tcp_write_err(sk); 575 tcp_write_err(sk);
566 goto out; 576 goto out;
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index b612acf76183..38bc0b52d745 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -6,7 +6,7 @@
6 * "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks." 6 * "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks."
7 * IEEE Journal on Selected Areas in Communication, 7 * IEEE Journal on Selected Areas in Communication,
8 * Feb. 2003. 8 * Feb. 2003.
9 * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf 9 * See http://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf
10 */ 10 */
11 11
12#include <linux/mm.h> 12#include <linux/mm.h>
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 20151d6a6241..a534dda5456e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk)
80 */ 80 */
81static inline u32 westwood_do_filter(u32 a, u32 b) 81static inline u32 westwood_do_filter(u32 a, u32 b)
82{ 82{
83 return (((7 * a) + b) >> 3); 83 return ((7 * a) + b) >> 3;
84} 84}
85 85
86static void westwood_filter(struct westwood *w, u32 delta) 86static void westwood_filter(struct westwood *w, u32 delta)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 59186ca7808a..ac3b3ee4b07c 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,32 +14,37 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers; 17static struct xfrm_tunnel __rcu *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers; 18static struct xfrm_tunnel __rcu *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel __rcu **fam_handlers(unsigned short family)
22{ 22{
23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers; 23 return (family == AF_INET) ? &tunnel4_handlers : &tunnel64_handlers;
24} 24}
25 25
26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family) 26int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
27{ 27{
28 struct xfrm_tunnel **pprev; 28 struct xfrm_tunnel __rcu **pprev;
29 struct xfrm_tunnel *t;
30
29 int ret = -EEXIST; 31 int ret = -EEXIST;
30 int priority = handler->priority; 32 int priority = handler->priority;
31 33
32 mutex_lock(&tunnel4_mutex); 34 mutex_lock(&tunnel4_mutex);
33 35
34 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 36 for (pprev = fam_handlers(family);
35 if ((*pprev)->priority > priority) 37 (t = rcu_dereference_protected(*pprev,
38 lockdep_is_held(&tunnel4_mutex))) != NULL;
39 pprev = &t->next) {
40 if (t->priority > priority)
36 break; 41 break;
37 if ((*pprev)->priority == priority) 42 if (t->priority == priority)
38 goto err; 43 goto err;
39 } 44 }
40 45
41 handler->next = *pprev; 46 handler->next = *pprev;
42 *pprev = handler; 47 rcu_assign_pointer(*pprev, handler);
43 48
44 ret = 0; 49 ret = 0;
45 50
@@ -52,13 +57,17 @@ EXPORT_SYMBOL(xfrm4_tunnel_register);
52 57
53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 58int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
54{ 59{
55 struct xfrm_tunnel **pprev; 60 struct xfrm_tunnel __rcu **pprev;
61 struct xfrm_tunnel *t;
56 int ret = -ENOENT; 62 int ret = -ENOENT;
57 63
58 mutex_lock(&tunnel4_mutex); 64 mutex_lock(&tunnel4_mutex);
59 65
60 for (pprev = fam_handlers(family); *pprev; pprev = &(*pprev)->next) { 66 for (pprev = fam_handlers(family);
61 if (*pprev == handler) { 67 (t = rcu_dereference_protected(*pprev,
68 lockdep_is_held(&tunnel4_mutex))) != NULL;
69 pprev = &t->next) {
70 if (t == handler) {
62 *pprev = handler->next; 71 *pprev = handler->next;
63 ret = 0; 72 ret = 0;
64 break; 73 break;
@@ -73,6 +82,11 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
73} 82}
74EXPORT_SYMBOL(xfrm4_tunnel_deregister); 83EXPORT_SYMBOL(xfrm4_tunnel_deregister);
75 84
85#define for_each_tunnel_rcu(head, handler) \
86 for (handler = rcu_dereference(head); \
87 handler != NULL; \
88 handler = rcu_dereference(handler->next)) \
89
76static int tunnel4_rcv(struct sk_buff *skb) 90static int tunnel4_rcv(struct sk_buff *skb)
77{ 91{
78 struct xfrm_tunnel *handler; 92 struct xfrm_tunnel *handler;
@@ -80,7 +94,7 @@ static int tunnel4_rcv(struct sk_buff *skb)
80 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 94 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
81 goto drop; 95 goto drop;
82 96
83 for (handler = tunnel4_handlers; handler; handler = handler->next) 97 for_each_tunnel_rcu(tunnel4_handlers, handler)
84 if (!handler->handler(skb)) 98 if (!handler->handler(skb))
85 return 0; 99 return 0;
86 100
@@ -99,7 +113,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
99 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 113 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
100 goto drop; 114 goto drop;
101 115
102 for (handler = tunnel64_handlers; handler; handler = handler->next) 116 for_each_tunnel_rcu(tunnel64_handlers, handler)
103 if (!handler->handler(skb)) 117 if (!handler->handler(skb))
104 return 0; 118 return 0;
105 119
@@ -115,7 +129,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
115{ 129{
116 struct xfrm_tunnel *handler; 130 struct xfrm_tunnel *handler;
117 131
118 for (handler = tunnel4_handlers; handler; handler = handler->next) 132 for_each_tunnel_rcu(tunnel4_handlers, handler)
119 if (!handler->err_handler(skb, info)) 133 if (!handler->err_handler(skb, info))
120 break; 134 break;
121} 135}
@@ -125,7 +139,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
125{ 139{
126 struct xfrm_tunnel *handler; 140 struct xfrm_tunnel *handler;
127 141
128 for (handler = tunnel64_handlers; handler; handler = handler->next) 142 for_each_tunnel_rcu(tunnel64_handlers, handler)
129 if (!handler->err_handler(skb, info)) 143 if (!handler->err_handler(skb, info))
130 break; 144 break;
131} 145}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb23c2e63b52..5e0a3a582a59 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,7 +110,7 @@
110struct udp_table udp_table __read_mostly; 110struct udp_table udp_table __read_mostly;
111EXPORT_SYMBOL(udp_table); 111EXPORT_SYMBOL(udp_table);
112 112
113int sysctl_udp_mem[3] __read_mostly; 113long sysctl_udp_mem[3] __read_mostly;
114EXPORT_SYMBOL(sysctl_udp_mem); 114EXPORT_SYMBOL(sysctl_udp_mem);
115 115
116int sysctl_udp_rmem_min __read_mostly; 116int sysctl_udp_rmem_min __read_mostly;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(sysctl_udp_rmem_min);
119int sysctl_udp_wmem_min __read_mostly; 119int sysctl_udp_wmem_min __read_mostly;
120EXPORT_SYMBOL(sysctl_udp_wmem_min); 120EXPORT_SYMBOL(sysctl_udp_wmem_min);
121 121
122atomic_t udp_memory_allocated; 122atomic_long_t udp_memory_allocated;
123EXPORT_SYMBOL(udp_memory_allocated); 123EXPORT_SYMBOL(udp_memory_allocated);
124 124
125#define MAX_UDP_PORTS 65536 125#define MAX_UDP_PORTS 65536
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
797 return -EOPNOTSUPP; 797 return -EOPNOTSUPP;
798 798
799 ipc.opt = NULL; 799 ipc.opt = NULL;
800 ipc.shtx.flags = 0; 800 ipc.tx_flags = 0;
801 801
802 if (up->pending) { 802 if (up->pending) {
803 /* 803 /*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
845 ipc.addr = inet->inet_saddr; 845 ipc.addr = inet->inet_saddr;
846 846
847 ipc.oif = sk->sk_bound_dev_if; 847 ipc.oif = sk->sk_bound_dev_if;
848 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 848 err = sock_tx_timestamp(sk, &ipc.tx_flags);
849 if (err) 849 if (err)
850 return err; 850 return err;
851 if (msg->msg_controllen) { 851 if (msg->msg_controllen) {
@@ -1413,7 +1413,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1413 } 1413 }
1414 } 1414 }
1415 1415
1416 if (sk->sk_filter) { 1416 if (rcu_dereference_raw(sk->sk_filter)) {
1417 if (udp_lib_checksum_complete(skb)) 1417 if (udp_lib_checksum_complete(skb))
1418 goto drop; 1418 goto drop;
1419 } 1419 }
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a580349f0b8a..4464f3bff6a7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); 174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
175 175
176 xfrm4_policy_afinfo.garbage_collect(net); 176 xfrm4_policy_afinfo.garbage_collect(net);
177 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 177 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
178} 178}
179 179
180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) 180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = {
232 .ifdown = xfrm4_dst_ifdown, 232 .ifdown = xfrm4_dst_ifdown,
233 .local_out = __ip_local_out, 233 .local_out = __ip_local_out,
234 .gc_thresh = 1024, 234 .gc_thresh = 1024,
235 .entries = ATOMIC_INIT(0),
236}; 235};
237 236
238static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 237static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size)
288 * and start cleaning when were 1/2 full 287 * and start cleaning when were 1/2 full
289 */ 288 */
290 xfrm4_dst_ops.gc_thresh = rt_max_size/2; 289 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
290 dst_entries_init(&xfrm4_dst_ops);
291 291
292 xfrm4_state_init(); 292 xfrm4_state_init();
293 xfrm4_policy_init(); 293 xfrm4_policy_init();
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 41f5982d2087..82806455e859 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -58,14 +58,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
58 return -ENOENT; 58 return -ENOENT;
59} 59}
60 60
61static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
62 .handler = xfrm_tunnel_rcv, 62 .handler = xfrm_tunnel_rcv,
63 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
68static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
71 .priority = 2, 71 .priority = 2,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 324fac3b6c16..b41ce0f0d514 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
243/* Check if a route is valid prefix route */ 243/* Check if a route is valid prefix route */
244static inline int addrconf_is_prefix_route(const struct rt6_info *rt) 244static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
245{ 245{
246 return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); 246 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
247} 247}
248 248
249static void addrconf_del_timer(struct inet6_ifaddr *ifp) 249static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -836,7 +836,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
836{ 836{
837 struct inet6_dev *idev = ifp->idev; 837 struct inet6_dev *idev = ifp->idev;
838 struct in6_addr addr, *tmpaddr; 838 struct in6_addr addr, *tmpaddr;
839 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; 839 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age;
840 unsigned long regen_advance; 840 unsigned long regen_advance;
841 int tmp_plen; 841 int tmp_plen;
842 int ret = 0; 842 int ret = 0;
@@ -886,12 +886,13 @@ retry:
886 goto out; 886 goto out;
887 } 887 }
888 memcpy(&addr.s6_addr[8], idev->rndid, 8); 888 memcpy(&addr.s6_addr[8], idev->rndid, 8);
889 age = (jiffies - ifp->tstamp) / HZ;
889 tmp_valid_lft = min_t(__u32, 890 tmp_valid_lft = min_t(__u32,
890 ifp->valid_lft, 891 ifp->valid_lft,
891 idev->cnf.temp_valid_lft); 892 idev->cnf.temp_valid_lft + age);
892 tmp_prefered_lft = min_t(__u32, 893 tmp_prefered_lft = min_t(__u32,
893 ifp->prefered_lft, 894 ifp->prefered_lft,
894 idev->cnf.temp_prefered_lft - 895 idev->cnf.temp_prefered_lft + age -
895 idev->cnf.max_desync_factor); 896 idev->cnf.max_desync_factor);
896 tmp_plen = ifp->prefix_len; 897 tmp_plen = ifp->prefix_len;
897 max_addresses = idev->cnf.max_addresses; 898 max_addresses = idev->cnf.max_addresses;
@@ -1426,8 +1427,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1426{ 1427{
1427 struct inet6_dev *idev = ifp->idev; 1428 struct inet6_dev *idev = ifp->idev;
1428 1429
1429 if (addrconf_dad_end(ifp)) 1430 if (addrconf_dad_end(ifp)) {
1431 in6_ifa_put(ifp);
1430 return; 1432 return;
1433 }
1431 1434
1432 if (net_ratelimit()) 1435 if (net_ratelimit())
1433 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", 1436 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
@@ -1544,7 +1547,7 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
1544 return 0; 1547 return 0;
1545} 1548}
1546 1549
1547int __ipv6_isatap_ifid(u8 *eui, __be32 addr) 1550static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
1548{ 1551{
1549 if (addr == 0) 1552 if (addr == 0)
1550 return -1; 1553 return -1;
@@ -1560,7 +1563,6 @@ int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
1560 memcpy(eui + 4, &addr, 4); 1563 memcpy(eui + 4, &addr, 4);
1561 return 0; 1564 return 0;
1562} 1565}
1563EXPORT_SYMBOL(__ipv6_isatap_ifid);
1564 1566
1565static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) 1567static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
1566{ 1568{
@@ -2022,10 +2024,11 @@ ok:
2022 ipv6_ifa_notify(0, ift); 2024 ipv6_ifa_notify(0, ift);
2023 } 2025 }
2024 2026
2025 if (create && in6_dev->cnf.use_tempaddr > 0) { 2027 if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
2026 /* 2028 /*
2027 * When a new public address is created as described in [ADDRCONF], 2029 * When a new public address is created as described in [ADDRCONF],
2028 * also create a new temporary address. 2030 * also create a new temporary address. Also create a temporary
2031 * address if it's enabled but no temporary address currently exists.
2029 */ 2032 */
2030 read_unlock_bh(&in6_dev->lock); 2033 read_unlock_bh(&in6_dev->lock);
2031 ipv6_create_tempaddr(ifp, NULL); 2034 ipv6_create_tempaddr(ifp, NULL);
@@ -2737,10 +2740,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2737 /* Flag it for later restoration when link comes up */ 2740 /* Flag it for later restoration when link comes up */
2738 ifa->flags |= IFA_F_TENTATIVE; 2741 ifa->flags |= IFA_F_TENTATIVE;
2739 ifa->state = INET6_IFADDR_STATE_DAD; 2742 ifa->state = INET6_IFADDR_STATE_DAD;
2740
2741 write_unlock_bh(&idev->lock);
2742
2743 in6_ifa_hold(ifa);
2744 } else { 2743 } else {
2745 list_del(&ifa->if_list); 2744 list_del(&ifa->if_list);
2746 2745
@@ -2755,19 +2754,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2755 ifa->state = INET6_IFADDR_STATE_DEAD; 2754 ifa->state = INET6_IFADDR_STATE_DEAD;
2756 spin_unlock_bh(&ifa->state_lock); 2755 spin_unlock_bh(&ifa->state_lock);
2757 2756
2758 if (state == INET6_IFADDR_STATE_DEAD) 2757 if (state == INET6_IFADDR_STATE_DEAD) {
2759 goto put_ifa; 2758 in6_ifa_put(ifa);
2759 } else {
2760 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2761 atomic_notifier_call_chain(&inet6addr_chain,
2762 NETDEV_DOWN, ifa);
2763 }
2764 write_lock_bh(&idev->lock);
2760 } 2765 }
2761
2762 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2763 if (ifa->state == INET6_IFADDR_STATE_DEAD)
2764 atomic_notifier_call_chain(&inet6addr_chain,
2765 NETDEV_DOWN, ifa);
2766
2767put_ifa:
2768 in6_ifa_put(ifa);
2769
2770 write_lock_bh(&idev->lock);
2771 } 2766 }
2772 2767
2773 list_splice(&keep_list, &idev->addr_list); 2768 list_splice(&keep_list, &idev->addr_list);
@@ -2964,7 +2959,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2964 start sending router solicitations. 2959 start sending router solicitations.
2965 */ 2960 */
2966 2961
2967 if (ifp->idev->cnf.forwarding == 0 && 2962 if ((ifp->idev->cnf.forwarding == 0 ||
2963 ifp->idev->cnf.forwarding == 2) &&
2968 ifp->idev->cnf.rtr_solicits > 0 && 2964 ifp->idev->cnf.rtr_solicits > 0 &&
2969 (dev->flags&IFF_LOOPBACK) == 0 && 2965 (dev->flags&IFF_LOOPBACK) == 0 &&
2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 2966 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8175f802651b..c8993e5a337c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -518,10 +518,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
518 518
519static inline int ip6addrlbl_msgsize(void) 519static inline int ip6addrlbl_msgsize(void)
520{ 520{
521 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 521 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
522 + nla_total_size(16) /* IFAL_ADDRESS */ 522 + nla_total_size(16) /* IFAL_ADDRESS */
523 + nla_total_size(4) /* IFAL_LABEL */ 523 + nla_total_size(4); /* IFAL_LABEL */
524 );
525} 524}
526 525
527static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 526static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 56b9bf2516f4..54e8e42f7a88 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -343,7 +343,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
343 */ 343 */
344 v4addr = LOOPBACK4_IPV6; 344 v4addr = LOOPBACK4_IPV6;
345 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 345 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
346 if (!ipv6_chk_addr(net, &addr->sin6_addr, 346 if (!inet->transparent &&
347 !ipv6_chk_addr(net, &addr->sin6_addr,
347 dev, 0)) { 348 dev, 0)) {
348 err = -EADDRNOTAVAIL; 349 err = -EADDRNOTAVAIL;
349 goto out_unlock; 350 goto out_unlock;
@@ -467,7 +468,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 468 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
468 sin->sin6_scope_id = sk->sk_bound_dev_if; 469 sin->sin6_scope_id = sk->sk_bound_dev_if;
469 *uaddr_len = sizeof(*sin); 470 *uaddr_len = sizeof(*sin);
470 return(0); 471 return 0;
471} 472}
472 473
473EXPORT_SYMBOL(inet6_getname); 474EXPORT_SYMBOL(inet6_getname);
@@ -488,7 +489,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
488 case SIOCADDRT: 489 case SIOCADDRT:
489 case SIOCDELRT: 490 case SIOCDELRT:
490 491
491 return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); 492 return ipv6_route_ioctl(net, cmd, (void __user *)arg);
492 493
493 case SIOCSIFADDR: 494 case SIOCSIFADDR:
494 return addrconf_add_ifaddr(net, (void __user *) arg); 495 return addrconf_add_ifaddr(net, (void __user *) arg);
@@ -502,7 +503,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
502 return sk->sk_prot->ioctl(sk, cmd, arg); 503 return sk->sk_prot->ioctl(sk, cmd, arg);
503 } 504 }
504 /*NOTREACHED*/ 505 /*NOTREACHED*/
505 return(0); 506 return 0;
506} 507}
507 508
508EXPORT_SYMBOL(inet6_ioctl); 509EXPORT_SYMBOL(inet6_ioctl);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ef371aa01ac5..320bdb877eed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -577,6 +577,25 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
577 u8 *ptr = nh + opt->dst1; 577 u8 *ptr = nh + opt->dst1;
578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); 578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
579 } 579 }
580 if (np->rxopt.bits.rxorigdstaddr) {
581 struct sockaddr_in6 sin6;
582 u16 *ports = (u16 *) skb_transport_header(skb);
583
584 if (skb_transport_offset(skb) + 4 <= skb->len) {
585 /* All current transport protocols have the port numbers in the
586 * first four bytes of the transport header and this function is
587 * written with this assumption in mind.
588 */
589
590 sin6.sin6_family = AF_INET6;
591 ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
592 sin6.sin6_port = ports[1];
593 sin6.sin6_flowinfo = 0;
594 sin6.sin6_scope_id = 0;
595
596 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
597 }
598 }
580 return 0; 599 return 0;
581} 600}
582 601
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e1caa5d526c2..14ed0a955b56 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr)
13 /* 13 /*
14 * find out if nexthdr is an extension header or a protocol 14 * find out if nexthdr is an extension header or a protocol
15 */ 15 */
16 return ( (nexthdr == NEXTHDR_HOP) || 16 return (nexthdr == NEXTHDR_HOP) ||
17 (nexthdr == NEXTHDR_ROUTING) || 17 (nexthdr == NEXTHDR_ROUTING) ||
18 (nexthdr == NEXTHDR_FRAGMENT) || 18 (nexthdr == NEXTHDR_FRAGMENT) ||
19 (nexthdr == NEXTHDR_AUTH) || 19 (nexthdr == NEXTHDR_AUTH) ||
20 (nexthdr == NEXTHDR_NONE) || 20 (nexthdr == NEXTHDR_NONE) ||
21 (nexthdr == NEXTHDR_DEST) ); 21 (nexthdr == NEXTHDR_DEST);
22} 22}
23 23
24/* 24/*
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b1108ede18e1..d829874d8946 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -34,11 +34,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
34{ 34{
35 struct fib_lookup_arg arg = { 35 struct fib_lookup_arg arg = {
36 .lookup_ptr = lookup, 36 .lookup_ptr = lookup,
37 .flags = FIB_LOOKUP_NOREF,
37 }; 38 };
38 39
39 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); 40 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg);
40 if (arg.rule)
41 fib_rule_put(arg.rule);
42 41
43 if (arg.result) 42 if (arg.result)
44 return arg.result; 43 return arg.result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b6a585909d35..de382114609b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1500,15 +1500,18 @@ static void fib6_gc_timer_cb(unsigned long arg)
1500 1500
1501static int __net_init fib6_net_init(struct net *net) 1501static int __net_init fib6_net_init(struct net *net)
1502{ 1502{
1503 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
1504
1503 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 1505 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
1504 1506
1505 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1507 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1506 if (!net->ipv6.rt6_stats) 1508 if (!net->ipv6.rt6_stats)
1507 goto out_timer; 1509 goto out_timer;
1508 1510
1509 net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ, 1511 /* Avoid false sharing : Use at least a full cache line */
1510 sizeof(*net->ipv6.fib_table_hash), 1512 size = max_t(size_t, size, L1_CACHE_BYTES);
1511 GFP_KERNEL); 1513
1514 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
1512 if (!net->ipv6.fib_table_hash) 1515 if (!net->ipv6.fib_table_hash)
1513 goto out_rt6_stats; 1516 goto out_rt6_stats;
1514 1517
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 980912ed7a38..99157b4cd56e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 } 637 }
638 mtu -= hlen + sizeof(struct frag_hdr); 638 mtu -= hlen + sizeof(struct frag_hdr);
639 639
640 if (skb_has_frags(skb)) { 640 if (skb_has_frag_list(skb)) {
641 int first_len = skb_pagelen(skb); 641 int first_len = skb_pagelen(skb);
642 struct sk_buff *frag2; 642 struct sk_buff *frag2;
643 643
@@ -878,8 +878,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
878 struct in6_addr *fl_addr, 878 struct in6_addr *fl_addr,
879 struct in6_addr *addr_cache) 879 struct in6_addr *addr_cache)
880{ 880{
881 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 881 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
883} 883}
884 884
885static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 885static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0fd027f3f47e..2a59610c2a58 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ 75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
76 (HASH_SIZE - 1)) 76 (HASH_SIZE - 1))
77 77
78static void ip6_tnl_dev_init(struct net_device *dev); 78static int ip6_tnl_dev_init(struct net_device *dev);
79static void ip6_tnl_dev_setup(struct net_device *dev); 79static void ip6_tnl_dev_setup(struct net_device *dev);
80 80
81static int ip6_tnl_net_id __read_mostly; 81static int ip6_tnl_net_id __read_mostly;
@@ -83,15 +83,42 @@ struct ip6_tnl_net {
83 /* the IPv6 tunnel fallback device */ 83 /* the IPv6 tunnel fallback device */
84 struct net_device *fb_tnl_dev; 84 struct net_device *fb_tnl_dev;
85 /* lists for storing tunnels in use */ 85 /* lists for storing tunnels in use */
86 struct ip6_tnl *tnls_r_l[HASH_SIZE]; 86 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1]; 87 struct ip6_tnl __rcu *tnls_wc[1];
88 struct ip6_tnl **tnls[2]; 88 struct ip6_tnl __rcu **tnls[2];
89}; 89};
90 90
91/* often modified stats are per cpu, other are shared (netdev->stats) */
92struct pcpu_tstats {
93 unsigned long rx_packets;
94 unsigned long rx_bytes;
95 unsigned long tx_packets;
96 unsigned long tx_bytes;
97};
98
99static struct net_device_stats *ip6_get_stats(struct net_device *dev)
100{
101 struct pcpu_tstats sum = { 0 };
102 int i;
103
104 for_each_possible_cpu(i) {
105 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
106
107 sum.rx_packets += tstats->rx_packets;
108 sum.rx_bytes += tstats->rx_bytes;
109 sum.tx_packets += tstats->tx_packets;
110 sum.tx_bytes += tstats->tx_bytes;
111 }
112 dev->stats.rx_packets = sum.rx_packets;
113 dev->stats.rx_bytes = sum.rx_bytes;
114 dev->stats.tx_packets = sum.tx_packets;
115 dev->stats.tx_bytes = sum.tx_bytes;
116 return &dev->stats;
117}
118
91/* 119/*
92 * Locking : hash tables are protected by RCU and a spinlock 120 * Locking : hash tables are protected by RCU and RTNL
93 */ 121 */
94static DEFINE_SPINLOCK(ip6_tnl_lock);
95 122
96static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 123static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
97{ 124{
@@ -138,8 +165,8 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
138static struct ip6_tnl * 165static struct ip6_tnl *
139ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 166ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
140{ 167{
141 unsigned h0 = HASH(remote); 168 unsigned int h0 = HASH(remote);
142 unsigned h1 = HASH(local); 169 unsigned int h1 = HASH(local);
143 struct ip6_tnl *t; 170 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 171 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 172
@@ -167,7 +194,7 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
167 * Return: head of IPv6 tunnel list 194 * Return: head of IPv6 tunnel list
168 **/ 195 **/
169 196
170static struct ip6_tnl ** 197static struct ip6_tnl __rcu **
171ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 198ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
172{ 199{
173 struct in6_addr *remote = &p->raddr; 200 struct in6_addr *remote = &p->raddr;
@@ -190,12 +217,10 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
190static void 217static void
191ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 218ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
192{ 219{
193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); 220 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
194 221
195 spin_lock_bh(&ip6_tnl_lock); 222 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
196 t->next = *tp;
197 rcu_assign_pointer(*tp, t); 223 rcu_assign_pointer(*tp, t);
198 spin_unlock_bh(&ip6_tnl_lock);
199} 224}
200 225
201/** 226/**
@@ -206,18 +231,25 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
206static void 231static void
207ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 232ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
208{ 233{
209 struct ip6_tnl **tp; 234 struct ip6_tnl __rcu **tp;
210 235 struct ip6_tnl *iter;
211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { 236
212 if (t == *tp) { 237 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
213 spin_lock_bh(&ip6_tnl_lock); 238 (iter = rtnl_dereference(*tp)) != NULL;
214 *tp = t->next; 239 tp = &iter->next) {
215 spin_unlock_bh(&ip6_tnl_lock); 240 if (t == iter) {
241 rcu_assign_pointer(*tp, t->next);
216 break; 242 break;
217 } 243 }
218 } 244 }
219} 245}
220 246
247static void ip6_dev_free(struct net_device *dev)
248{
249 free_percpu(dev->tstats);
250 free_netdev(dev);
251}
252
221/** 253/**
222 * ip6_tnl_create() - create a new tunnel 254 * ip6_tnl_create() - create a new tunnel
223 * @p: tunnel parameters 255 * @p: tunnel parameters
@@ -256,7 +288,9 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
256 288
257 t = netdev_priv(dev); 289 t = netdev_priv(dev);
258 t->parms = *p; 290 t->parms = *p;
259 ip6_tnl_dev_init(dev); 291 err = ip6_tnl_dev_init(dev);
292 if (err < 0)
293 goto failed_free;
260 294
261 if ((err = register_netdevice(dev)) < 0) 295 if ((err = register_netdevice(dev)) < 0)
262 goto failed_free; 296 goto failed_free;
@@ -266,7 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
266 return t; 300 return t;
267 301
268failed_free: 302failed_free:
269 free_netdev(dev); 303 ip6_dev_free(dev);
270failed: 304failed:
271 return NULL; 305 return NULL;
272} 306}
@@ -290,10 +324,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
290{ 324{
291 struct in6_addr *remote = &p->raddr; 325 struct in6_addr *remote = &p->raddr;
292 struct in6_addr *local = &p->laddr; 326 struct in6_addr *local = &p->laddr;
327 struct ip6_tnl __rcu **tp;
293 struct ip6_tnl *t; 328 struct ip6_tnl *t;
294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 329 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
295 330
296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { 331 for (tp = ip6_tnl_bucket(ip6n, p);
332 (t = rtnl_dereference(*tp)) != NULL;
333 tp = &t->next) {
297 if (ipv6_addr_equal(local, &t->parms.laddr) && 334 if (ipv6_addr_equal(local, &t->parms.laddr) &&
298 ipv6_addr_equal(remote, &t->parms.raddr)) 335 ipv6_addr_equal(remote, &t->parms.raddr))
299 return t; 336 return t;
@@ -318,13 +355,10 @@ ip6_tnl_dev_uninit(struct net_device *dev)
318 struct net *net = dev_net(dev); 355 struct net *net = dev_net(dev);
319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 356 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
320 357
321 if (dev == ip6n->fb_tnl_dev) { 358 if (dev == ip6n->fb_tnl_dev)
322 spin_lock_bh(&ip6_tnl_lock); 359 rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
323 ip6n->tnls_wc[0] = NULL; 360 else
324 spin_unlock_bh(&ip6_tnl_lock);
325 } else {
326 ip6_tnl_unlink(ip6n, t); 361 ip6_tnl_unlink(ip6n, t);
327 }
328 ip6_tnl_dst_reset(t); 362 ip6_tnl_dst_reset(t);
329 dev_put(dev); 363 dev_put(dev);
330} 364}
@@ -702,6 +736,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
702 736
703 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 737 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
704 &ipv6h->daddr)) != NULL) { 738 &ipv6h->daddr)) != NULL) {
739 struct pcpu_tstats *tstats;
740
705 if (t->parms.proto != ipproto && t->parms.proto != 0) { 741 if (t->parms.proto != ipproto && t->parms.proto != 0) {
706 rcu_read_unlock(); 742 rcu_read_unlock();
707 goto discard; 743 goto discard;
@@ -724,10 +760,16 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
724 skb->pkt_type = PACKET_HOST; 760 skb->pkt_type = PACKET_HOST;
725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 761 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
726 762
727 skb_tunnel_rx(skb, t->dev); 763 tstats = this_cpu_ptr(t->dev->tstats);
764 tstats->rx_packets++;
765 tstats->rx_bytes += skb->len;
766
767 __skb_tunnel_rx(skb, t->dev);
728 768
729 dscp_ecn_decapsulate(t, ipv6h, skb); 769 dscp_ecn_decapsulate(t, ipv6h, skb);
770
730 netif_rx(skb); 771 netif_rx(skb);
772
731 rcu_read_unlock(); 773 rcu_read_unlock();
732 return 0; 774 return 0;
733 } 775 }
@@ -934,8 +976,10 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
934 err = ip6_local_out(skb); 976 err = ip6_local_out(skb);
935 977
936 if (net_xmit_eval(err) == 0) { 978 if (net_xmit_eval(err) == 0) {
937 stats->tx_bytes += pkt_len; 979 struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
938 stats->tx_packets++; 980
981 tstats->tx_bytes += pkt_len;
982 tstats->tx_packets++;
939 } else { 983 } else {
940 stats->tx_errors++; 984 stats->tx_errors++;
941 stats->tx_aborted_errors++; 985 stats->tx_aborted_errors++;
@@ -1240,6 +1284,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1240 t = netdev_priv(dev); 1284 t = netdev_priv(dev);
1241 1285
1242 ip6_tnl_unlink(ip6n, t); 1286 ip6_tnl_unlink(ip6n, t);
1287 synchronize_net();
1243 err = ip6_tnl_change(t, &p); 1288 err = ip6_tnl_change(t, &p);
1244 ip6_tnl_link(ip6n, t); 1289 ip6_tnl_link(ip6n, t);
1245 netdev_state_change(dev); 1290 netdev_state_change(dev);
@@ -1300,12 +1345,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1300 1345
1301 1346
1302static const struct net_device_ops ip6_tnl_netdev_ops = { 1347static const struct net_device_ops ip6_tnl_netdev_ops = {
1303 .ndo_uninit = ip6_tnl_dev_uninit, 1348 .ndo_uninit = ip6_tnl_dev_uninit,
1304 .ndo_start_xmit = ip6_tnl_xmit, 1349 .ndo_start_xmit = ip6_tnl_xmit,
1305 .ndo_do_ioctl = ip6_tnl_ioctl, 1350 .ndo_do_ioctl = ip6_tnl_ioctl,
1306 .ndo_change_mtu = ip6_tnl_change_mtu, 1351 .ndo_change_mtu = ip6_tnl_change_mtu,
1352 .ndo_get_stats = ip6_get_stats,
1307}; 1353};
1308 1354
1355
1309/** 1356/**
1310 * ip6_tnl_dev_setup - setup virtual tunnel device 1357 * ip6_tnl_dev_setup - setup virtual tunnel device
1311 * @dev: virtual device associated with tunnel 1358 * @dev: virtual device associated with tunnel
@@ -1317,7 +1364,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
1317static void ip6_tnl_dev_setup(struct net_device *dev) 1364static void ip6_tnl_dev_setup(struct net_device *dev)
1318{ 1365{
1319 dev->netdev_ops = &ip6_tnl_netdev_ops; 1366 dev->netdev_ops = &ip6_tnl_netdev_ops;
1320 dev->destructor = free_netdev; 1367 dev->destructor = ip6_dev_free;
1321 1368
1322 dev->type = ARPHRD_TUNNEL6; 1369 dev->type = ARPHRD_TUNNEL6;
1323 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1370 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1325,6 +1372,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1325 dev->flags |= IFF_NOARP; 1372 dev->flags |= IFF_NOARP;
1326 dev->addr_len = sizeof(struct in6_addr); 1373 dev->addr_len = sizeof(struct in6_addr);
1327 dev->features |= NETIF_F_NETNS_LOCAL; 1374 dev->features |= NETIF_F_NETNS_LOCAL;
1375 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1328} 1376}
1329 1377
1330 1378
@@ -1333,12 +1381,17 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1333 * @dev: virtual device associated with tunnel 1381 * @dev: virtual device associated with tunnel
1334 **/ 1382 **/
1335 1383
1336static inline void 1384static inline int
1337ip6_tnl_dev_init_gen(struct net_device *dev) 1385ip6_tnl_dev_init_gen(struct net_device *dev)
1338{ 1386{
1339 struct ip6_tnl *t = netdev_priv(dev); 1387 struct ip6_tnl *t = netdev_priv(dev);
1388
1340 t->dev = dev; 1389 t->dev = dev;
1341 strcpy(t->parms.name, dev->name); 1390 strcpy(t->parms.name, dev->name);
1391 dev->tstats = alloc_percpu(struct pcpu_tstats);
1392 if (!dev->tstats)
1393 return -ENOMEM;
1394 return 0;
1342} 1395}
1343 1396
1344/** 1397/**
@@ -1346,11 +1399,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1346 * @dev: virtual device associated with tunnel 1399 * @dev: virtual device associated with tunnel
1347 **/ 1400 **/
1348 1401
1349static void ip6_tnl_dev_init(struct net_device *dev) 1402static int ip6_tnl_dev_init(struct net_device *dev)
1350{ 1403{
1351 struct ip6_tnl *t = netdev_priv(dev); 1404 struct ip6_tnl *t = netdev_priv(dev);
1352 ip6_tnl_dev_init_gen(dev); 1405 int err = ip6_tnl_dev_init_gen(dev);
1406
1407 if (err)
1408 return err;
1353 ip6_tnl_link_config(t); 1409 ip6_tnl_link_config(t);
1410 return 0;
1354} 1411}
1355 1412
1356/** 1413/**
@@ -1360,25 +1417,29 @@ static void ip6_tnl_dev_init(struct net_device *dev)
1360 * Return: 0 1417 * Return: 0
1361 **/ 1418 **/
1362 1419
1363static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1420static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1364{ 1421{
1365 struct ip6_tnl *t = netdev_priv(dev); 1422 struct ip6_tnl *t = netdev_priv(dev);
1366 struct net *net = dev_net(dev); 1423 struct net *net = dev_net(dev);
1367 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1424 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1425 int err = ip6_tnl_dev_init_gen(dev);
1426
1427 if (err)
1428 return err;
1368 1429
1369 ip6_tnl_dev_init_gen(dev);
1370 t->parms.proto = IPPROTO_IPV6; 1430 t->parms.proto = IPPROTO_IPV6;
1371 dev_hold(dev); 1431 dev_hold(dev);
1372 ip6n->tnls_wc[0] = t; 1432 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1433 return 0;
1373} 1434}
1374 1435
1375static struct xfrm6_tunnel ip4ip6_handler = { 1436static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1376 .handler = ip4ip6_rcv, 1437 .handler = ip4ip6_rcv,
1377 .err_handler = ip4ip6_err, 1438 .err_handler = ip4ip6_err,
1378 .priority = 1, 1439 .priority = 1,
1379}; 1440};
1380 1441
1381static struct xfrm6_tunnel ip6ip6_handler = { 1442static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1382 .handler = ip6ip6_rcv, 1443 .handler = ip6ip6_rcv,
1383 .err_handler = ip6ip6_err, 1444 .err_handler = ip6ip6_err,
1384 .priority = 1, 1445 .priority = 1,
@@ -1391,14 +1452,14 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1391 LIST_HEAD(list); 1452 LIST_HEAD(list);
1392 1453
1393 for (h = 0; h < HASH_SIZE; h++) { 1454 for (h = 0; h < HASH_SIZE; h++) {
1394 t = ip6n->tnls_r_l[h]; 1455 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1395 while (t != NULL) { 1456 while (t != NULL) {
1396 unregister_netdevice_queue(t->dev, &list); 1457 unregister_netdevice_queue(t->dev, &list);
1397 t = t->next; 1458 t = rtnl_dereference(t->next);
1398 } 1459 }
1399 } 1460 }
1400 1461
1401 t = ip6n->tnls_wc[0]; 1462 t = rtnl_dereference(ip6n->tnls_wc[0]);
1402 unregister_netdevice_queue(t->dev, &list); 1463 unregister_netdevice_queue(t->dev, &list);
1403 unregister_netdevice_many(&list); 1464 unregister_netdevice_many(&list);
1404} 1465}
@@ -1419,7 +1480,9 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1419 goto err_alloc_dev; 1480 goto err_alloc_dev;
1420 dev_net_set(ip6n->fb_tnl_dev, net); 1481 dev_net_set(ip6n->fb_tnl_dev, net);
1421 1482
1422 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1483 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1484 if (err < 0)
1485 goto err_register;
1423 1486
1424 err = register_netdev(ip6n->fb_tnl_dev); 1487 err = register_netdev(ip6n->fb_tnl_dev);
1425 if (err < 0) 1488 if (err < 0)
@@ -1427,7 +1490,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1427 return 0; 1490 return 0;
1428 1491
1429err_register: 1492err_register:
1430 free_netdev(ip6n->fb_tnl_dev); 1493 ip6_dev_free(ip6n->fb_tnl_dev);
1431err_alloc_dev: 1494err_alloc_dev:
1432 return err; 1495 return err;
1433} 1496}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 66078dad7fe8..6f32ffce7022 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -667,6 +667,7 @@ static int pim6_rcv(struct sk_buff *skb)
667 skb_tunnel_rx(skb, reg_dev); 667 skb_tunnel_rx(skb, reg_dev);
668 668
669 netif_rx(skb); 669 netif_rx(skb);
670
670 dev_put(reg_dev); 671 dev_put(reg_dev);
671 return 0; 672 return 0;
672 drop: 673 drop:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a7f66bc8f0b0..d1770e061c08 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -342,6 +342,25 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
342 retv = 0; 342 retv = 0;
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT:
346 if (!capable(CAP_NET_ADMIN)) {
347 retv = -EPERM;
348 break;
349 }
350 if (optlen < sizeof(int))
351 goto e_inval;
352 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
353 inet_sk(sk)->transparent = valbool;
354 retv = 0;
355 break;
356
357 case IPV6_RECVORIGDSTADDR:
358 if (optlen < sizeof(int))
359 goto e_inval;
360 np->rxopt.bits.rxorigdstaddr = valbool;
361 retv = 0;
362 break;
363
345 case IPV6_HOPOPTS: 364 case IPV6_HOPOPTS:
346 case IPV6_RTHDRDSTOPTS: 365 case IPV6_RTHDRDSTOPTS:
347 case IPV6_RTHDR: 366 case IPV6_RTHDR:
@@ -1104,6 +1123,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1104 break; 1123 break;
1105 } 1124 }
1106 1125
1126 case IPV6_TRANSPARENT:
1127 val = inet_sk(sk)->transparent;
1128 break;
1129
1130 case IPV6_RECVORIGDSTADDR:
1131 val = np->rxopt.bits.rxorigdstaddr;
1132 break;
1133
1107 case IPV6_UNICAST_HOPS: 1134 case IPV6_UNICAST_HOPS:
1108 case IPV6_MULTICAST_HOPS: 1135 case IPV6_MULTICAST_HOPS:
1109 { 1136 {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58841c4ae947..998d6d27e7cf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -91,7 +91,9 @@
91#include <linux/netfilter.h> 91#include <linux/netfilter.h>
92#include <linux/netfilter_ipv6.h> 92#include <linux/netfilter_ipv6.h>
93 93
94static u32 ndisc_hash(const void *pkey, const struct net_device *dev); 94static u32 ndisc_hash(const void *pkey,
95 const struct net_device *dev,
96 __u32 rnd);
95static int ndisc_constructor(struct neighbour *neigh); 97static int ndisc_constructor(struct neighbour *neigh);
96static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); 98static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
97static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); 99static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -228,12 +230,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
228 do { 230 do {
229 cur = ((void *)cur) + (cur->nd_opt_len << 3); 231 cur = ((void *)cur) + (cur->nd_opt_len << 3);
230 } while(cur < end && cur->nd_opt_type != type); 232 } while(cur < end && cur->nd_opt_type != type);
231 return (cur <= end && cur->nd_opt_type == type ? cur : NULL); 233 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
232} 234}
233 235
234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) 236static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
235{ 237{
236 return (opt->nd_opt_type == ND_OPT_RDNSS); 238 return opt->nd_opt_type == ND_OPT_RDNSS;
237} 239}
238 240
239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, 241static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -244,7 +246,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
244 do { 246 do {
245 cur = ((void *)cur) + (cur->nd_opt_len << 3); 247 cur = ((void *)cur) + (cur->nd_opt_len << 3);
246 } while(cur < end && !ndisc_is_useropt(cur)); 248 } while(cur < end && !ndisc_is_useropt(cur));
247 return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); 249 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
248} 250}
249 251
250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, 252static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
@@ -319,7 +321,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
319 int prepad = ndisc_addr_option_pad(dev->type); 321 int prepad = ndisc_addr_option_pad(dev->type);
320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) 322 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
321 return NULL; 323 return NULL;
322 return (lladdr + prepad); 324 return lladdr + prepad;
323} 325}
324 326
325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 327int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
@@ -350,7 +352,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
350 352
351EXPORT_SYMBOL(ndisc_mc_map); 353EXPORT_SYMBOL(ndisc_mc_map);
352 354
353static u32 ndisc_hash(const void *pkey, const struct net_device *dev) 355static u32 ndisc_hash(const void *pkey,
356 const struct net_device *dev,
357 __u32 hash_rnd)
354{ 358{
355 const u32 *p32 = pkey; 359 const u32 *p32 = pkey;
356 u32 addr_hash, i; 360 u32 addr_hash, i;
@@ -359,7 +363,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
359 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) 363 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
360 addr_hash ^= *p32++; 364 addr_hash ^= *p32++;
361 365
362 return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd); 366 return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
363} 367}
364 368
365static int ndisc_constructor(struct neighbour *neigh) 369static int ndisc_constructor(struct neighbour *neigh)
@@ -1105,6 +1109,18 @@ errout:
1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1109 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1106} 1110}
1107 1111
1112static inline int accept_ra(struct inet6_dev *in6_dev)
1113{
1114 /*
1115 * If forwarding is enabled, RA are not accepted unless the special
1116 * hybrid mode (accept_ra=2) is enabled.
1117 */
1118 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1119 return 0;
1120
1121 return in6_dev->cnf.accept_ra;
1122}
1123
1108static void ndisc_router_discovery(struct sk_buff *skb) 1124static void ndisc_router_discovery(struct sk_buff *skb)
1109{ 1125{
1110 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1126 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1158,8 +1174,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1158 return; 1174 return;
1159 } 1175 }
1160 1176
1161 /* skip route and link configuration on routers */ 1177 if (!accept_ra(in6_dev))
1162 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1163 goto skip_linkparms; 1178 goto skip_linkparms;
1164 1179
1165#ifdef CONFIG_IPV6_NDISC_NODETYPE 1180#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1309,8 +1324,7 @@ skip_linkparms:
1309 NEIGH_UPDATE_F_ISROUTER); 1324 NEIGH_UPDATE_F_ISROUTER);
1310 } 1325 }
1311 1326
1312 /* skip route and link configuration on routers */ 1327 if (!accept_ra(in6_dev))
1313 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1314 goto out; 1328 goto out;
1315 1329
1316#ifdef CONFIG_IPV6_ROUTE_INFO 1330#ifdef CONFIG_IPV6_ROUTE_INFO
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 29d643bcafa4..448464844a25 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,10 +5,15 @@
5menu "IPv6: Netfilter Configuration" 5menu "IPv6: Netfilter Configuration"
6 depends on INET && IPV6 && NETFILTER 6 depends on INET && IPV6 && NETFILTER
7 7
8config NF_DEFRAG_IPV6
9 tristate
10 default n
11
8config NF_CONNTRACK_IPV6 12config NF_CONNTRACK_IPV6
9 tristate "IPv6 connection tracking support" 13 tristate "IPv6 connection tracking support"
10 depends on INET && IPV6 && NF_CONNTRACK 14 depends on INET && IPV6 && NF_CONNTRACK
11 default m if NETFILTER_ADVANCED=n 15 default m if NETFILTER_ADVANCED=n
16 select NF_DEFRAG_IPV6
12 ---help--- 17 ---help---
13 Connection tracking keeps a record of what packets have passed 18 Connection tracking keeps a record of what packets have passed
14 through your machine, in order to figure out how they are related 19 through your machine, in order to figure out how they are related
@@ -132,10 +137,10 @@ config IP6_NF_MATCH_RT
132# The targets 137# The targets
133config IP6_NF_TARGET_HL 138config IP6_NF_TARGET_HL
134 tristate '"HL" hoplimit target support' 139 tristate '"HL" hoplimit target support'
135 depends on NETFILTER_ADVANCED 140 depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
136 select NETFILTER_XT_TARGET_HL 141 select NETFILTER_XT_TARGET_HL
137 ---help--- 142 ---help---
138 This is a backwards-compat option for the user's convenience 143 This is a backwards-compatible option for the user's convenience
139 (e.g. when running oldconfig). It selects 144 (e.g. when running oldconfig). It selects
140 CONFIG_NETFILTER_XT_TARGET_HL. 145 CONFIG_NETFILTER_XT_TARGET_HL.
141 146
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index aafbba30c899..0a432c9b0795 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,10 +11,14 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o 14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15 15
16# l3 independent conntrack 16# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
18
19# defrag
20nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
18 22
19# matches 23# matches
20obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 24obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c24..455582384ece 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
82int 82int
83ip6t_ext_hdr(u8 nexthdr) 83ip6t_ext_hdr(u8 nexthdr)
84{ 84{
85 return ( (nexthdr == IPPROTO_HOPOPTS) || 85 return (nexthdr == IPPROTO_HOPOPTS) ||
86 (nexthdr == IPPROTO_ROUTING) || 86 (nexthdr == IPPROTO_ROUTING) ||
87 (nexthdr == IPPROTO_FRAGMENT) || 87 (nexthdr == IPPROTO_FRAGMENT) ||
88 (nexthdr == IPPROTO_ESP) || 88 (nexthdr == IPPROTO_ESP) ||
89 (nexthdr == IPPROTO_AH) || 89 (nexthdr == IPPROTO_AH) ||
90 (nexthdr == IPPROTO_NONE) || 90 (nexthdr == IPPROTO_NONE) ||
91 (nexthdr == IPPROTO_DSTOPTS) ); 91 (nexthdr == IPPROTO_DSTOPTS);
92} 92}
93 93
94/* Returns whether matches rule or not. */ 94/* Returns whether matches rule or not. */
@@ -215,7 +215,7 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6)
215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; 215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
216} 216}
217 217
218static inline const struct ip6t_entry_target * 218static inline const struct xt_entry_target *
219ip6t_get_target_c(const struct ip6t_entry *e) 219ip6t_get_target_c(const struct ip6t_entry *e)
220{ 220{
221 return ip6t_get_target((struct ip6t_entry *)e); 221 return ip6t_get_target((struct ip6t_entry *)e);
@@ -260,9 +260,9 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
260 const char *hookname, const char **chainname, 260 const char *hookname, const char **chainname,
261 const char **comment, unsigned int *rulenum) 261 const char **comment, unsigned int *rulenum)
262{ 262{
263 const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s); 263 const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
264 264
265 if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { 265 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
266 /* Head of user chain: ERROR target with chainname */ 266 /* Head of user chain: ERROR target with chainname */
267 *chainname = t->target.data; 267 *chainname = t->target.data;
268 (*rulenum) = 0; 268 (*rulenum) = 0;
@@ -271,7 +271,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
271 271
272 if (s->target_offset == sizeof(struct ip6t_entry) && 272 if (s->target_offset == sizeof(struct ip6t_entry) &&
273 strcmp(t->target.u.kernel.target->name, 273 strcmp(t->target.u.kernel.target->name,
274 IP6T_STANDARD_TARGET) == 0 && 274 XT_STANDARD_TARGET) == 0 &&
275 t->verdict < 0 && 275 t->verdict < 0 &&
276 unconditional(&s->ipv6)) { 276 unconditional(&s->ipv6)) {
277 /* Tail of chains: STANDARD target (return/policy) */ 277 /* Tail of chains: STANDARD target (return/policy) */
@@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
369 e = get_entry(table_base, private->hook_entry[hook]); 369 e = get_entry(table_base, private->hook_entry[hook]);
370 370
371 do { 371 do {
372 const struct ip6t_entry_target *t; 372 const struct xt_entry_target *t;
373 const struct xt_entry_match *ematch; 373 const struct xt_entry_match *ematch;
374 374
375 IP_NF_ASSERT(e); 375 IP_NF_ASSERT(e);
@@ -403,10 +403,10 @@ ip6t_do_table(struct sk_buff *skb,
403 if (!t->u.kernel.target->target) { 403 if (!t->u.kernel.target->target) {
404 int v; 404 int v;
405 405
406 v = ((struct ip6t_standard_target *)t)->verdict; 406 v = ((struct xt_standard_target *)t)->verdict;
407 if (v < 0) { 407 if (v < 0) {
408 /* Pop from stack? */ 408 /* Pop from stack? */
409 if (v != IP6T_RETURN) { 409 if (v != XT_RETURN) {
410 verdict = (unsigned)(-v) - 1; 410 verdict = (unsigned)(-v) - 1;
411 break; 411 break;
412 } 412 }
@@ -434,7 +434,7 @@ ip6t_do_table(struct sk_buff *skb,
434 acpar.targinfo = t->data; 434 acpar.targinfo = t->data;
435 435
436 verdict = t->u.kernel.target->target(skb, &acpar); 436 verdict = t->u.kernel.target->target(skb, &acpar);
437 if (verdict == IP6T_CONTINUE) 437 if (verdict == XT_CONTINUE)
438 e = ip6t_next_entry(e); 438 e = ip6t_next_entry(e);
439 else 439 else
440 /* Verdict */ 440 /* Verdict */
@@ -474,7 +474,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
474 e->counters.pcnt = pos; 474 e->counters.pcnt = pos;
475 475
476 for (;;) { 476 for (;;) {
477 const struct ip6t_standard_target *t 477 const struct xt_standard_target *t
478 = (void *)ip6t_get_target_c(e); 478 = (void *)ip6t_get_target_c(e);
479 int visited = e->comefrom & (1 << hook); 479 int visited = e->comefrom & (1 << hook);
480 480
@@ -488,13 +488,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
488 /* Unconditional return/END. */ 488 /* Unconditional return/END. */
489 if ((e->target_offset == sizeof(struct ip6t_entry) && 489 if ((e->target_offset == sizeof(struct ip6t_entry) &&
490 (strcmp(t->target.u.user.name, 490 (strcmp(t->target.u.user.name,
491 IP6T_STANDARD_TARGET) == 0) && 491 XT_STANDARD_TARGET) == 0) &&
492 t->verdict < 0 && 492 t->verdict < 0 &&
493 unconditional(&e->ipv6)) || visited) { 493 unconditional(&e->ipv6)) || visited) {
494 unsigned int oldpos, size; 494 unsigned int oldpos, size;
495 495
496 if ((strcmp(t->target.u.user.name, 496 if ((strcmp(t->target.u.user.name,
497 IP6T_STANDARD_TARGET) == 0) && 497 XT_STANDARD_TARGET) == 0) &&
498 t->verdict < -NF_MAX_VERDICT - 1) { 498 t->verdict < -NF_MAX_VERDICT - 1) {
499 duprintf("mark_source_chains: bad " 499 duprintf("mark_source_chains: bad "
500 "negative verdict (%i)\n", 500 "negative verdict (%i)\n",
@@ -537,7 +537,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
537 int newpos = t->verdict; 537 int newpos = t->verdict;
538 538
539 if (strcmp(t->target.u.user.name, 539 if (strcmp(t->target.u.user.name,
540 IP6T_STANDARD_TARGET) == 0 && 540 XT_STANDARD_TARGET) == 0 &&
541 newpos >= 0) { 541 newpos >= 0) {
542 if (newpos > newinfo->size - 542 if (newpos > newinfo->size -
543 sizeof(struct ip6t_entry)) { 543 sizeof(struct ip6t_entry)) {
@@ -565,7 +565,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
565 return 1; 565 return 1;
566} 566}
567 567
568static void cleanup_match(struct ip6t_entry_match *m, struct net *net) 568static void cleanup_match(struct xt_entry_match *m, struct net *net)
569{ 569{
570 struct xt_mtdtor_param par; 570 struct xt_mtdtor_param par;
571 571
@@ -581,14 +581,14 @@ static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
581static int 581static int
582check_entry(const struct ip6t_entry *e, const char *name) 582check_entry(const struct ip6t_entry *e, const char *name)
583{ 583{
584 const struct ip6t_entry_target *t; 584 const struct xt_entry_target *t;
585 585
586 if (!ip6_checkentry(&e->ipv6)) { 586 if (!ip6_checkentry(&e->ipv6)) {
587 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 587 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
588 return -EINVAL; 588 return -EINVAL;
589 } 589 }
590 590
591 if (e->target_offset + sizeof(struct ip6t_entry_target) > 591 if (e->target_offset + sizeof(struct xt_entry_target) >
592 e->next_offset) 592 e->next_offset)
593 return -EINVAL; 593 return -EINVAL;
594 594
@@ -599,7 +599,7 @@ check_entry(const struct ip6t_entry *e, const char *name)
599 return 0; 599 return 0;
600} 600}
601 601
602static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 602static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
603{ 603{
604 const struct ip6t_ip6 *ipv6 = par->entryinfo; 604 const struct ip6t_ip6 *ipv6 = par->entryinfo;
605 int ret; 605 int ret;
@@ -618,7 +618,7 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
618} 618}
619 619
620static int 620static int
621find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 621find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
622{ 622{
623 struct xt_match *match; 623 struct xt_match *match;
624 int ret; 624 int ret;
@@ -643,7 +643,7 @@ err:
643 643
644static int check_target(struct ip6t_entry *e, struct net *net, const char *name) 644static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
645{ 645{
646 struct ip6t_entry_target *t = ip6t_get_target(e); 646 struct xt_entry_target *t = ip6t_get_target(e);
647 struct xt_tgchk_param par = { 647 struct xt_tgchk_param par = {
648 .net = net, 648 .net = net,
649 .table = name, 649 .table = name,
@@ -670,7 +670,7 @@ static int
670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, 670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
671 unsigned int size) 671 unsigned int size)
672{ 672{
673 struct ip6t_entry_target *t; 673 struct xt_entry_target *t;
674 struct xt_target *target; 674 struct xt_target *target;
675 int ret; 675 int ret;
676 unsigned int j; 676 unsigned int j;
@@ -721,7 +721,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
721 721
722static bool check_underflow(const struct ip6t_entry *e) 722static bool check_underflow(const struct ip6t_entry *e)
723{ 723{
724 const struct ip6t_entry_target *t; 724 const struct xt_entry_target *t;
725 unsigned int verdict; 725 unsigned int verdict;
726 726
727 if (!unconditional(&e->ipv6)) 727 if (!unconditional(&e->ipv6))
@@ -729,7 +729,7 @@ static bool check_underflow(const struct ip6t_entry *e)
729 t = ip6t_get_target_c(e); 729 t = ip6t_get_target_c(e);
730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
731 return false; 731 return false;
732 verdict = ((struct ip6t_standard_target *)t)->verdict; 732 verdict = ((struct xt_standard_target *)t)->verdict;
733 verdict = -verdict - 1; 733 verdict = -verdict - 1;
734 return verdict == NF_DROP || verdict == NF_ACCEPT; 734 return verdict == NF_DROP || verdict == NF_ACCEPT;
735} 735}
@@ -752,7 +752,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
752 } 752 }
753 753
754 if (e->next_offset 754 if (e->next_offset
755 < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) { 755 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
756 duprintf("checking: element %p size %u\n", 756 duprintf("checking: element %p size %u\n",
757 e, e->next_offset); 757 e, e->next_offset);
758 return -EINVAL; 758 return -EINVAL;
@@ -784,7 +784,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
784static void cleanup_entry(struct ip6t_entry *e, struct net *net) 784static void cleanup_entry(struct ip6t_entry *e, struct net *net)
785{ 785{
786 struct xt_tgdtor_param par; 786 struct xt_tgdtor_param par;
787 struct ip6t_entry_target *t; 787 struct xt_entry_target *t;
788 struct xt_entry_match *ematch; 788 struct xt_entry_match *ematch;
789 789
790 /* Cleanup all matches */ 790 /* Cleanup all matches */
@@ -985,8 +985,8 @@ copy_entries_to_user(unsigned int total_size,
985 /* ... then go back and fix counters and names */ 985 /* ... then go back and fix counters and names */
986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
987 unsigned int i; 987 unsigned int i;
988 const struct ip6t_entry_match *m; 988 const struct xt_entry_match *m;
989 const struct ip6t_entry_target *t; 989 const struct xt_entry_target *t;
990 990
991 e = (struct ip6t_entry *)(loc_cpu_entry + off); 991 e = (struct ip6t_entry *)(loc_cpu_entry + off);
992 if (copy_to_user(userptr + off 992 if (copy_to_user(userptr + off
@@ -1003,7 +1003,7 @@ copy_entries_to_user(unsigned int total_size,
1003 m = (void *)e + i; 1003 m = (void *)e + i;
1004 1004
1005 if (copy_to_user(userptr + off + i 1005 if (copy_to_user(userptr + off + i
1006 + offsetof(struct ip6t_entry_match, 1006 + offsetof(struct xt_entry_match,
1007 u.user.name), 1007 u.user.name),
1008 m->u.kernel.match->name, 1008 m->u.kernel.match->name,
1009 strlen(m->u.kernel.match->name)+1) 1009 strlen(m->u.kernel.match->name)+1)
@@ -1015,7 +1015,7 @@ copy_entries_to_user(unsigned int total_size,
1015 1015
1016 t = ip6t_get_target_c(e); 1016 t = ip6t_get_target_c(e);
1017 if (copy_to_user(userptr + off + e->target_offset 1017 if (copy_to_user(userptr + off + e->target_offset
1018 + offsetof(struct ip6t_entry_target, 1018 + offsetof(struct xt_entry_target,
1019 u.user.name), 1019 u.user.name),
1020 t->u.kernel.target->name, 1020 t->u.kernel.target->name,
1021 strlen(t->u.kernel.target->name)+1) != 0) { 1021 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1053,7 +1053,7 @@ static int compat_calc_entry(const struct ip6t_entry *e,
1053 const void *base, struct xt_table_info *newinfo) 1053 const void *base, struct xt_table_info *newinfo)
1054{ 1054{
1055 const struct xt_entry_match *ematch; 1055 const struct xt_entry_match *ematch;
1056 const struct ip6t_entry_target *t; 1056 const struct xt_entry_target *t;
1057 unsigned int entry_offset; 1057 unsigned int entry_offset;
1058 int off, i, ret; 1058 int off, i, ret;
1059 1059
@@ -1105,7 +1105,7 @@ static int compat_table_info(const struct xt_table_info *info,
1105static int get_info(struct net *net, void __user *user, 1105static int get_info(struct net *net, void __user *user,
1106 const int *len, int compat) 1106 const int *len, int compat)
1107{ 1107{
1108 char name[IP6T_TABLE_MAXNAMELEN]; 1108 char name[XT_TABLE_MAXNAMELEN];
1109 struct xt_table *t; 1109 struct xt_table *t;
1110 int ret; 1110 int ret;
1111 1111
@@ -1118,7 +1118,7 @@ static int get_info(struct net *net, void __user *user,
1118 if (copy_from_user(name, user, sizeof(name)) != 0) 1118 if (copy_from_user(name, user, sizeof(name)) != 0)
1119 return -EFAULT; 1119 return -EFAULT;
1120 1120
1121 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1121 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1122#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1123 if (compat) 1123 if (compat)
1124 xt_compat_lock(AF_INET6); 1124 xt_compat_lock(AF_INET6);
@@ -1137,6 +1137,7 @@ static int get_info(struct net *net, void __user *user,
1137 private = &tmp; 1137 private = &tmp;
1138 } 1138 }
1139#endif 1139#endif
1140 memset(&info, 0, sizeof(info));
1140 info.valid_hooks = t->valid_hooks; 1141 info.valid_hooks = t->valid_hooks;
1141 memcpy(info.hook_entry, private->hook_entry, 1142 memcpy(info.hook_entry, private->hook_entry,
1142 sizeof(info.hook_entry)); 1143 sizeof(info.hook_entry));
@@ -1415,14 +1416,14 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1415 1416
1416#ifdef CONFIG_COMPAT 1417#ifdef CONFIG_COMPAT
1417struct compat_ip6t_replace { 1418struct compat_ip6t_replace {
1418 char name[IP6T_TABLE_MAXNAMELEN]; 1419 char name[XT_TABLE_MAXNAMELEN];
1419 u32 valid_hooks; 1420 u32 valid_hooks;
1420 u32 num_entries; 1421 u32 num_entries;
1421 u32 size; 1422 u32 size;
1422 u32 hook_entry[NF_INET_NUMHOOKS]; 1423 u32 hook_entry[NF_INET_NUMHOOKS];
1423 u32 underflow[NF_INET_NUMHOOKS]; 1424 u32 underflow[NF_INET_NUMHOOKS];
1424 u32 num_counters; 1425 u32 num_counters;
1425 compat_uptr_t counters; /* struct ip6t_counters * */ 1426 compat_uptr_t counters; /* struct xt_counters * */
1426 struct compat_ip6t_entry entries[0]; 1427 struct compat_ip6t_entry entries[0];
1427}; 1428};
1428 1429
@@ -1431,7 +1432,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1431 unsigned int *size, struct xt_counters *counters, 1432 unsigned int *size, struct xt_counters *counters,
1432 unsigned int i) 1433 unsigned int i)
1433{ 1434{
1434 struct ip6t_entry_target *t; 1435 struct xt_entry_target *t;
1435 struct compat_ip6t_entry __user *ce; 1436 struct compat_ip6t_entry __user *ce;
1436 u_int16_t target_offset, next_offset; 1437 u_int16_t target_offset, next_offset;
1437 compat_uint_t origsize; 1438 compat_uint_t origsize;
@@ -1466,7 +1467,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1466} 1467}
1467 1468
1468static int 1469static int
1469compat_find_calc_match(struct ip6t_entry_match *m, 1470compat_find_calc_match(struct xt_entry_match *m,
1470 const char *name, 1471 const char *name,
1471 const struct ip6t_ip6 *ipv6, 1472 const struct ip6t_ip6 *ipv6,
1472 unsigned int hookmask, 1473 unsigned int hookmask,
@@ -1488,7 +1489,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
1488 1489
1489static void compat_release_entry(struct compat_ip6t_entry *e) 1490static void compat_release_entry(struct compat_ip6t_entry *e)
1490{ 1491{
1491 struct ip6t_entry_target *t; 1492 struct xt_entry_target *t;
1492 struct xt_entry_match *ematch; 1493 struct xt_entry_match *ematch;
1493 1494
1494 /* Cleanup all matches */ 1495 /* Cleanup all matches */
@@ -1509,7 +1510,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1509 const char *name) 1510 const char *name)
1510{ 1511{
1511 struct xt_entry_match *ematch; 1512 struct xt_entry_match *ematch;
1512 struct ip6t_entry_target *t; 1513 struct xt_entry_target *t;
1513 struct xt_target *target; 1514 struct xt_target *target;
1514 unsigned int entry_offset; 1515 unsigned int entry_offset;
1515 unsigned int j; 1516 unsigned int j;
@@ -1591,7 +1592,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1591 unsigned int *size, const char *name, 1592 unsigned int *size, const char *name,
1592 struct xt_table_info *newinfo, unsigned char *base) 1593 struct xt_table_info *newinfo, unsigned char *base)
1593{ 1594{
1594 struct ip6t_entry_target *t; 1595 struct xt_entry_target *t;
1595 struct xt_target *target; 1596 struct xt_target *target;
1596 struct ip6t_entry *de; 1597 struct ip6t_entry *de;
1597 unsigned int origsize; 1598 unsigned int origsize;
@@ -1899,7 +1900,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1899} 1900}
1900 1901
1901struct compat_ip6t_get_entries { 1902struct compat_ip6t_get_entries {
1902 char name[IP6T_TABLE_MAXNAMELEN]; 1903 char name[XT_TABLE_MAXNAMELEN];
1903 compat_uint_t size; 1904 compat_uint_t size;
1904 struct compat_ip6t_entry entrytable[0]; 1905 struct compat_ip6t_entry entrytable[0];
1905}; 1906};
@@ -2054,7 +2055,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2054 2055
2055 case IP6T_SO_GET_REVISION_MATCH: 2056 case IP6T_SO_GET_REVISION_MATCH:
2056 case IP6T_SO_GET_REVISION_TARGET: { 2057 case IP6T_SO_GET_REVISION_TARGET: {
2057 struct ip6t_get_revision rev; 2058 struct xt_get_revision rev;
2058 int target; 2059 int target;
2059 2060
2060 if (*len != sizeof(rev)) { 2061 if (*len != sizeof(rev)) {
@@ -2191,7 +2192,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
2191/* The built-in targets: standard (NULL) and error. */ 2192/* The built-in targets: standard (NULL) and error. */
2192static struct xt_target ip6t_builtin_tg[] __read_mostly = { 2193static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2193 { 2194 {
2194 .name = IP6T_STANDARD_TARGET, 2195 .name = XT_STANDARD_TARGET,
2195 .targetsize = sizeof(int), 2196 .targetsize = sizeof(int),
2196 .family = NFPROTO_IPV6, 2197 .family = NFPROTO_IPV6,
2197#ifdef CONFIG_COMPAT 2198#ifdef CONFIG_COMPAT
@@ -2201,9 +2202,9 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2201#endif 2202#endif
2202 }, 2203 },
2203 { 2204 {
2204 .name = IP6T_ERROR_TARGET, 2205 .name = XT_ERROR_TARGET,
2205 .target = ip6t_error, 2206 .target = ip6t_error,
2206 .targetsize = IP6T_FUNCTION_MAXNAMELEN, 2207 .targetsize = XT_FUNCTION_MAXNAMELEN,
2207 .family = NFPROTO_IPV6, 2208 .family = NFPROTO_IPV6,
2208 }, 2209 },
2209}; 2210};
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0a07ae7b933f..09c88891a753 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,6 +23,7 @@
23#include <linux/netfilter/x_tables.h> 23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv6/ip6_tables.h> 24#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/nf_log.h> 25#include <net/netfilter/nf_log.h>
26#include <net/netfilter/xt_log.h>
26 27
27MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); 28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
28MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); 29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
@@ -32,11 +33,9 @@ struct in_device;
32#include <net/route.h> 33#include <net/route.h>
33#include <linux/netfilter_ipv6/ip6t_LOG.h> 34#include <linux/netfilter_ipv6/ip6t_LOG.h>
34 35
35/* Use lock to serialize, so printks don't overlap */
36static DEFINE_SPINLOCK(log_lock);
37
38/* One level of recursion won't kill us */ 36/* One level of recursion won't kill us */
39static void dump_packet(const struct nf_loginfo *info, 37static void dump_packet(struct sbuff *m,
38 const struct nf_loginfo *info,
40 const struct sk_buff *skb, unsigned int ip6hoff, 39 const struct sk_buff *skb, unsigned int ip6hoff,
41 int recurse) 40 int recurse)
42{ 41{
@@ -55,15 +54,15 @@ static void dump_packet(const struct nf_loginfo *info,
55 54
56 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); 55 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
57 if (ih == NULL) { 56 if (ih == NULL) {
58 printk("TRUNCATED"); 57 sb_add(m, "TRUNCATED");
59 return; 58 return;
60 } 59 }
61 60
62 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ 61 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
63 printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); 62 sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
64 63
65 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ 64 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
66 printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", 65 sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
67 ntohs(ih->payload_len) + sizeof(struct ipv6hdr), 66 ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
68 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, 67 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
69 ih->hop_limit, 68 ih->hop_limit,
@@ -78,35 +77,35 @@ static void dump_packet(const struct nf_loginfo *info,
78 77
79 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); 78 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
80 if (hp == NULL) { 79 if (hp == NULL) {
81 printk("TRUNCATED"); 80 sb_add(m, "TRUNCATED");
82 return; 81 return;
83 } 82 }
84 83
85 /* Max length: 48 "OPT (...) " */ 84 /* Max length: 48 "OPT (...) " */
86 if (logflags & IP6T_LOG_IPOPT) 85 if (logflags & IP6T_LOG_IPOPT)
87 printk("OPT ( "); 86 sb_add(m, "OPT ( ");
88 87
89 switch (currenthdr) { 88 switch (currenthdr) {
90 case IPPROTO_FRAGMENT: { 89 case IPPROTO_FRAGMENT: {
91 struct frag_hdr _fhdr; 90 struct frag_hdr _fhdr;
92 const struct frag_hdr *fh; 91 const struct frag_hdr *fh;
93 92
94 printk("FRAG:"); 93 sb_add(m, "FRAG:");
95 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), 94 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
96 &_fhdr); 95 &_fhdr);
97 if (fh == NULL) { 96 if (fh == NULL) {
98 printk("TRUNCATED "); 97 sb_add(m, "TRUNCATED ");
99 return; 98 return;
100 } 99 }
101 100
102 /* Max length: 6 "65535 " */ 101 /* Max length: 6 "65535 " */
103 printk("%u ", ntohs(fh->frag_off) & 0xFFF8); 102 sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
104 103
105 /* Max length: 11 "INCOMPLETE " */ 104 /* Max length: 11 "INCOMPLETE " */
106 if (fh->frag_off & htons(0x0001)) 105 if (fh->frag_off & htons(0x0001))
107 printk("INCOMPLETE "); 106 sb_add(m, "INCOMPLETE ");
108 107
109 printk("ID:%08x ", ntohl(fh->identification)); 108 sb_add(m, "ID:%08x ", ntohl(fh->identification));
110 109
111 if (ntohs(fh->frag_off) & 0xFFF8) 110 if (ntohs(fh->frag_off) & 0xFFF8)
112 fragment = 1; 111 fragment = 1;
@@ -120,7 +119,7 @@ static void dump_packet(const struct nf_loginfo *info,
120 case IPPROTO_HOPOPTS: 119 case IPPROTO_HOPOPTS:
121 if (fragment) { 120 if (fragment) {
122 if (logflags & IP6T_LOG_IPOPT) 121 if (logflags & IP6T_LOG_IPOPT)
123 printk(")"); 122 sb_add(m, ")");
124 return; 123 return;
125 } 124 }
126 hdrlen = ipv6_optlen(hp); 125 hdrlen = ipv6_optlen(hp);
@@ -132,10 +131,10 @@ static void dump_packet(const struct nf_loginfo *info,
132 const struct ip_auth_hdr *ah; 131 const struct ip_auth_hdr *ah;
133 132
134 /* Max length: 3 "AH " */ 133 /* Max length: 3 "AH " */
135 printk("AH "); 134 sb_add(m, "AH ");
136 135
137 if (fragment) { 136 if (fragment) {
138 printk(")"); 137 sb_add(m, ")");
139 return; 138 return;
140 } 139 }
141 140
@@ -146,13 +145,13 @@ static void dump_packet(const struct nf_loginfo *info,
146 * Max length: 26 "INCOMPLETE [65535 145 * Max length: 26 "INCOMPLETE [65535
147 * bytes] )" 146 * bytes] )"
148 */ 147 */
149 printk("INCOMPLETE [%u bytes] )", 148 sb_add(m, "INCOMPLETE [%u bytes] )",
150 skb->len - ptr); 149 skb->len - ptr);
151 return; 150 return;
152 } 151 }
153 152
154 /* Length: 15 "SPI=0xF1234567 */ 153 /* Length: 15 "SPI=0xF1234567 */
155 printk("SPI=0x%x ", ntohl(ah->spi)); 154 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
156 155
157 } 156 }
158 157
@@ -164,10 +163,10 @@ static void dump_packet(const struct nf_loginfo *info,
164 const struct ip_esp_hdr *eh; 163 const struct ip_esp_hdr *eh;
165 164
166 /* Max length: 4 "ESP " */ 165 /* Max length: 4 "ESP " */
167 printk("ESP "); 166 sb_add(m, "ESP ");
168 167
169 if (fragment) { 168 if (fragment) {
170 printk(")"); 169 sb_add(m, ")");
171 return; 170 return;
172 } 171 }
173 172
@@ -177,23 +176,23 @@ static void dump_packet(const struct nf_loginfo *info,
177 eh = skb_header_pointer(skb, ptr, sizeof(_esph), 176 eh = skb_header_pointer(skb, ptr, sizeof(_esph),
178 &_esph); 177 &_esph);
179 if (eh == NULL) { 178 if (eh == NULL) {
180 printk("INCOMPLETE [%u bytes] )", 179 sb_add(m, "INCOMPLETE [%u bytes] )",
181 skb->len - ptr); 180 skb->len - ptr);
182 return; 181 return;
183 } 182 }
184 183
185 /* Length: 16 "SPI=0xF1234567 )" */ 184 /* Length: 16 "SPI=0xF1234567 )" */
186 printk("SPI=0x%x )", ntohl(eh->spi) ); 185 sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
187 186
188 } 187 }
189 return; 188 return;
190 default: 189 default:
191 /* Max length: 20 "Unknown Ext Hdr 255" */ 190 /* Max length: 20 "Unknown Ext Hdr 255" */
192 printk("Unknown Ext Hdr %u", currenthdr); 191 sb_add(m, "Unknown Ext Hdr %u", currenthdr);
193 return; 192 return;
194 } 193 }
195 if (logflags & IP6T_LOG_IPOPT) 194 if (logflags & IP6T_LOG_IPOPT)
196 printk(") "); 195 sb_add(m, ") ");
197 196
198 currenthdr = hp->nexthdr; 197 currenthdr = hp->nexthdr;
199 ptr += hdrlen; 198 ptr += hdrlen;
@@ -205,7 +204,7 @@ static void dump_packet(const struct nf_loginfo *info,
205 const struct tcphdr *th; 204 const struct tcphdr *th;
206 205
207 /* Max length: 10 "PROTO=TCP " */ 206 /* Max length: 10 "PROTO=TCP " */
208 printk("PROTO=TCP "); 207 sb_add(m, "PROTO=TCP ");
209 208
210 if (fragment) 209 if (fragment)
211 break; 210 break;
@@ -213,40 +212,40 @@ static void dump_packet(const struct nf_loginfo *info,
213 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 212 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
214 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph); 213 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
215 if (th == NULL) { 214 if (th == NULL) {
216 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 215 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
217 return; 216 return;
218 } 217 }
219 218
220 /* Max length: 20 "SPT=65535 DPT=65535 " */ 219 /* Max length: 20 "SPT=65535 DPT=65535 " */
221 printk("SPT=%u DPT=%u ", 220 sb_add(m, "SPT=%u DPT=%u ",
222 ntohs(th->source), ntohs(th->dest)); 221 ntohs(th->source), ntohs(th->dest));
223 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 222 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
224 if (logflags & IP6T_LOG_TCPSEQ) 223 if (logflags & IP6T_LOG_TCPSEQ)
225 printk("SEQ=%u ACK=%u ", 224 sb_add(m, "SEQ=%u ACK=%u ",
226 ntohl(th->seq), ntohl(th->ack_seq)); 225 ntohl(th->seq), ntohl(th->ack_seq));
227 /* Max length: 13 "WINDOW=65535 " */ 226 /* Max length: 13 "WINDOW=65535 " */
228 printk("WINDOW=%u ", ntohs(th->window)); 227 sb_add(m, "WINDOW=%u ", ntohs(th->window));
229 /* Max length: 9 "RES=0x3C " */ 228 /* Max length: 9 "RES=0x3C " */
230 printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 229 sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
231 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 230 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
232 if (th->cwr) 231 if (th->cwr)
233 printk("CWR "); 232 sb_add(m, "CWR ");
234 if (th->ece) 233 if (th->ece)
235 printk("ECE "); 234 sb_add(m, "ECE ");
236 if (th->urg) 235 if (th->urg)
237 printk("URG "); 236 sb_add(m, "URG ");
238 if (th->ack) 237 if (th->ack)
239 printk("ACK "); 238 sb_add(m, "ACK ");
240 if (th->psh) 239 if (th->psh)
241 printk("PSH "); 240 sb_add(m, "PSH ");
242 if (th->rst) 241 if (th->rst)
243 printk("RST "); 242 sb_add(m, "RST ");
244 if (th->syn) 243 if (th->syn)
245 printk("SYN "); 244 sb_add(m, "SYN ");
246 if (th->fin) 245 if (th->fin)
247 printk("FIN "); 246 sb_add(m, "FIN ");
248 /* Max length: 11 "URGP=65535 " */ 247 /* Max length: 11 "URGP=65535 " */
249 printk("URGP=%u ", ntohs(th->urg_ptr)); 248 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
250 249
251 if ((logflags & IP6T_LOG_TCPOPT) && 250 if ((logflags & IP6T_LOG_TCPOPT) &&
252 th->doff * 4 > sizeof(struct tcphdr)) { 251 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -260,15 +259,15 @@ static void dump_packet(const struct nf_loginfo *info,
260 ptr + sizeof(struct tcphdr), 259 ptr + sizeof(struct tcphdr),
261 optsize, _opt); 260 optsize, _opt);
262 if (op == NULL) { 261 if (op == NULL) {
263 printk("OPT (TRUNCATED)"); 262 sb_add(m, "OPT (TRUNCATED)");
264 return; 263 return;
265 } 264 }
266 265
267 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 266 /* Max length: 127 "OPT (" 15*4*2chars ") " */
268 printk("OPT ("); 267 sb_add(m, "OPT (");
269 for (i =0; i < optsize; i++) 268 for (i =0; i < optsize; i++)
270 printk("%02X", op[i]); 269 sb_add(m, "%02X", op[i]);
271 printk(") "); 270 sb_add(m, ") ");
272 } 271 }
273 break; 272 break;
274 } 273 }
@@ -279,9 +278,9 @@ static void dump_packet(const struct nf_loginfo *info,
279 278
280 if (currenthdr == IPPROTO_UDP) 279 if (currenthdr == IPPROTO_UDP)
281 /* Max length: 10 "PROTO=UDP " */ 280 /* Max length: 10 "PROTO=UDP " */
282 printk("PROTO=UDP " ); 281 sb_add(m, "PROTO=UDP " );
283 else /* Max length: 14 "PROTO=UDPLITE " */ 282 else /* Max length: 14 "PROTO=UDPLITE " */
284 printk("PROTO=UDPLITE "); 283 sb_add(m, "PROTO=UDPLITE ");
285 284
286 if (fragment) 285 if (fragment)
287 break; 286 break;
@@ -289,12 +288,12 @@ static void dump_packet(const struct nf_loginfo *info,
289 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 288 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
290 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph); 289 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
291 if (uh == NULL) { 290 if (uh == NULL) {
292 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 291 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
293 return; 292 return;
294 } 293 }
295 294
296 /* Max length: 20 "SPT=65535 DPT=65535 " */ 295 /* Max length: 20 "SPT=65535 DPT=65535 " */
297 printk("SPT=%u DPT=%u LEN=%u ", 296 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
298 ntohs(uh->source), ntohs(uh->dest), 297 ntohs(uh->source), ntohs(uh->dest),
299 ntohs(uh->len)); 298 ntohs(uh->len));
300 break; 299 break;
@@ -304,7 +303,7 @@ static void dump_packet(const struct nf_loginfo *info,
304 const struct icmp6hdr *ic; 303 const struct icmp6hdr *ic;
305 304
306 /* Max length: 13 "PROTO=ICMPv6 " */ 305 /* Max length: 13 "PROTO=ICMPv6 " */
307 printk("PROTO=ICMPv6 "); 306 sb_add(m, "PROTO=ICMPv6 ");
308 307
309 if (fragment) 308 if (fragment)
310 break; 309 break;
@@ -312,18 +311,18 @@ static void dump_packet(const struct nf_loginfo *info,
312 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 311 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
313 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); 312 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
314 if (ic == NULL) { 313 if (ic == NULL) {
315 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 314 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
316 return; 315 return;
317 } 316 }
318 317
319 /* Max length: 18 "TYPE=255 CODE=255 " */ 318 /* Max length: 18 "TYPE=255 CODE=255 " */
320 printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); 319 sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
321 320
322 switch (ic->icmp6_type) { 321 switch (ic->icmp6_type) {
323 case ICMPV6_ECHO_REQUEST: 322 case ICMPV6_ECHO_REQUEST:
324 case ICMPV6_ECHO_REPLY: 323 case ICMPV6_ECHO_REPLY:
325 /* Max length: 19 "ID=65535 SEQ=65535 " */ 324 /* Max length: 19 "ID=65535 SEQ=65535 " */
326 printk("ID=%u SEQ=%u ", 325 sb_add(m, "ID=%u SEQ=%u ",
327 ntohs(ic->icmp6_identifier), 326 ntohs(ic->icmp6_identifier),
328 ntohs(ic->icmp6_sequence)); 327 ntohs(ic->icmp6_sequence));
329 break; 328 break;
@@ -334,35 +333,35 @@ static void dump_packet(const struct nf_loginfo *info,
334 333
335 case ICMPV6_PARAMPROB: 334 case ICMPV6_PARAMPROB:
336 /* Max length: 17 "POINTER=ffffffff " */ 335 /* Max length: 17 "POINTER=ffffffff " */
337 printk("POINTER=%08x ", ntohl(ic->icmp6_pointer)); 336 sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
338 /* Fall through */ 337 /* Fall through */
339 case ICMPV6_DEST_UNREACH: 338 case ICMPV6_DEST_UNREACH:
340 case ICMPV6_PKT_TOOBIG: 339 case ICMPV6_PKT_TOOBIG:
341 case ICMPV6_TIME_EXCEED: 340 case ICMPV6_TIME_EXCEED:
342 /* Max length: 3+maxlen */ 341 /* Max length: 3+maxlen */
343 if (recurse) { 342 if (recurse) {
344 printk("["); 343 sb_add(m, "[");
345 dump_packet(info, skb, ptr + sizeof(_icmp6h), 344 dump_packet(m, info, skb,
346 0); 345 ptr + sizeof(_icmp6h), 0);
347 printk("] "); 346 sb_add(m, "] ");
348 } 347 }
349 348
350 /* Max length: 10 "MTU=65535 " */ 349 /* Max length: 10 "MTU=65535 " */
351 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) 350 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
352 printk("MTU=%u ", ntohl(ic->icmp6_mtu)); 351 sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
353 } 352 }
354 break; 353 break;
355 } 354 }
356 /* Max length: 10 "PROTO=255 " */ 355 /* Max length: 10 "PROTO=255 " */
357 default: 356 default:
358 printk("PROTO=%u ", currenthdr); 357 sb_add(m, "PROTO=%u ", currenthdr);
359 } 358 }
360 359
361 /* Max length: 15 "UID=4294967295 " */ 360 /* Max length: 15 "UID=4294967295 " */
362 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { 361 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
363 read_lock_bh(&skb->sk->sk_callback_lock); 362 read_lock_bh(&skb->sk->sk_callback_lock);
364 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 363 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
365 printk("UID=%u GID=%u ", 364 sb_add(m, "UID=%u GID=%u ",
366 skb->sk->sk_socket->file->f_cred->fsuid, 365 skb->sk->sk_socket->file->f_cred->fsuid,
367 skb->sk->sk_socket->file->f_cred->fsgid); 366 skb->sk->sk_socket->file->f_cred->fsgid);
368 read_unlock_bh(&skb->sk->sk_callback_lock); 367 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -370,10 +369,11 @@ static void dump_packet(const struct nf_loginfo *info,
370 369
371 /* Max length: 16 "MARK=0xFFFFFFFF " */ 370 /* Max length: 16 "MARK=0xFFFFFFFF " */
372 if (!recurse && skb->mark) 371 if (!recurse && skb->mark)
373 printk("MARK=0x%x ", skb->mark); 372 sb_add(m, "MARK=0x%x ", skb->mark);
374} 373}
375 374
376static void dump_mac_header(const struct nf_loginfo *info, 375static void dump_mac_header(struct sbuff *m,
376 const struct nf_loginfo *info,
377 const struct sk_buff *skb) 377 const struct sk_buff *skb)
378{ 378{
379 struct net_device *dev = skb->dev; 379 struct net_device *dev = skb->dev;
@@ -387,7 +387,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 387
388 switch (dev->type) { 388 switch (dev->type) {
389 case ARPHRD_ETHER: 389 case ARPHRD_ETHER:
390 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 390 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto)); 392 ntohs(eth_hdr(skb)->h_proto));
393 return; 393 return;
@@ -396,7 +396,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
396 } 396 }
397 397
398fallback: 398fallback:
399 printk("MAC="); 399 sb_add(m, "MAC=");
400 if (dev->hard_header_len && 400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) { 401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb); 402 const unsigned char *p = skb_mac_header(skb);
@@ -408,19 +408,19 @@ fallback:
408 p = NULL; 408 p = NULL;
409 409
410 if (p != NULL) { 410 if (p != NULL) {
411 printk("%02x", *p++); 411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++) 412 for (i = 1; i < len; i++)
413 printk(":%02x", p[i]); 413 sb_add(m, ":%02x", p[i]);
414 } 414 }
415 printk(" "); 415 sb_add(m, " ");
416 416
417 if (dev->type == ARPHRD_SIT) { 417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph = 418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb); 419 (struct iphdr *)skb_mac_header(skb);
420 printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); 420 sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 } 421 }
422 } else 422 } else
423 printk(" "); 423 sb_add(m, " ");
424} 424}
425 425
426static struct nf_loginfo default_loginfo = { 426static struct nf_loginfo default_loginfo = {
@@ -442,22 +442,23 @@ ip6t_log_packet(u_int8_t pf,
442 const struct nf_loginfo *loginfo, 442 const struct nf_loginfo *loginfo,
443 const char *prefix) 443 const char *prefix)
444{ 444{
445 struct sbuff *m = sb_open();
446
445 if (!loginfo) 447 if (!loginfo)
446 loginfo = &default_loginfo; 448 loginfo = &default_loginfo;
447 449
448 spin_lock_bh(&log_lock); 450 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
449 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 451 prefix,
450 prefix, 452 in ? in->name : "",
451 in ? in->name : "", 453 out ? out->name : "");
452 out ? out->name : "");
453 454
454 /* MAC logging for input path only. */ 455 /* MAC logging for input path only. */
455 if (in && !out) 456 if (in && !out)
456 dump_mac_header(loginfo, skb); 457 dump_mac_header(m, loginfo, skb);
458
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
457 460
458 dump_packet(loginfo, skb, skb_network_offset(skb), 1); 461 sb_close(m);
459 printk("\n");
460 spin_unlock_bh(&log_lock);
461} 462}
462 463
463static unsigned int 464static unsigned int
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ff43461704be..c8af58b22562 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/icmp.h> 18#include <linux/icmp.h>
19#include <linux/sysctl.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
21#include <net/inet_frag.h> 20#include <net/inet_frag.h>
22 21
@@ -29,6 +28,7 @@
29#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
33 33
34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -189,53 +189,6 @@ out:
189 return nf_conntrack_confirm(skb); 189 return nf_conntrack_confirm(skb);
190} 190}
191 191
192static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
193 struct sk_buff *skb)
194{
195 u16 zone = NF_CT_DEFAULT_ZONE;
196
197 if (skb->nfct)
198 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
199
200#ifdef CONFIG_BRIDGE_NETFILTER
201 if (skb->nf_bridge &&
202 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
203 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
204#endif
205 if (hooknum == NF_INET_PRE_ROUTING)
206 return IP6_DEFRAG_CONNTRACK_IN + zone;
207 else
208 return IP6_DEFRAG_CONNTRACK_OUT + zone;
209
210}
211
212static unsigned int ipv6_defrag(unsigned int hooknum,
213 struct sk_buff *skb,
214 const struct net_device *in,
215 const struct net_device *out,
216 int (*okfn)(struct sk_buff *))
217{
218 struct sk_buff *reasm;
219
220 /* Previously seen (loopback)? */
221 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
222 return NF_ACCEPT;
223
224 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
225 /* queued */
226 if (reasm == NULL)
227 return NF_STOLEN;
228
229 /* error occured or not fragmented */
230 if (reasm == skb)
231 return NF_ACCEPT;
232
233 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
234 (struct net_device *)out, okfn);
235
236 return NF_STOLEN;
237}
238
239static unsigned int __ipv6_conntrack_in(struct net *net, 192static unsigned int __ipv6_conntrack_in(struct net *net,
240 unsigned int hooknum, 193 unsigned int hooknum,
241 struct sk_buff *skb, 194 struct sk_buff *skb,
@@ -288,13 +241,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
288 241
289static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 242static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
290 { 243 {
291 .hook = ipv6_defrag,
292 .owner = THIS_MODULE,
293 .pf = NFPROTO_IPV6,
294 .hooknum = NF_INET_PRE_ROUTING,
295 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
296 },
297 {
298 .hook = ipv6_conntrack_in, 244 .hook = ipv6_conntrack_in,
299 .owner = THIS_MODULE, 245 .owner = THIS_MODULE,
300 .pf = NFPROTO_IPV6, 246 .pf = NFPROTO_IPV6,
@@ -309,13 +255,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
309 .priority = NF_IP6_PRI_CONNTRACK, 255 .priority = NF_IP6_PRI_CONNTRACK,
310 }, 256 },
311 { 257 {
312 .hook = ipv6_defrag,
313 .owner = THIS_MODULE,
314 .pf = NFPROTO_IPV6,
315 .hooknum = NF_INET_LOCAL_OUT,
316 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
317 },
318 {
319 .hook = ipv6_confirm, 258 .hook = ipv6_confirm,
320 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
321 .pf = NFPROTO_IPV6, 260 .pf = NFPROTO_IPV6,
@@ -387,10 +326,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
387 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 326 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
388 .nla_policy = ipv6_nla_policy, 327 .nla_policy = ipv6_nla_policy,
389#endif 328#endif
390#ifdef CONFIG_SYSCTL
391 .ctl_table_path = nf_net_netfilter_sysctl_path,
392 .ctl_table = nf_ct_ipv6_sysctl_table,
393#endif
394 .me = THIS_MODULE, 329 .me = THIS_MODULE,
395}; 330};
396 331
@@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
403 int ret = 0; 338 int ret = 0;
404 339
405 need_conntrack(); 340 need_conntrack();
341 nf_defrag_ipv6_enable();
406 342
407 ret = nf_ct_frag6_init();
408 if (ret < 0) {
409 pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
410 return ret;
411 }
412 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); 343 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
413 if (ret < 0) { 344 if (ret < 0) {
414 pr_err("nf_conntrack_ipv6: can't register tcp.\n"); 345 pr_err("nf_conntrack_ipv6: can't register tcp.\n");
415 goto cleanup_frag6; 346 return ret;
416 } 347 }
417 348
418 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); 349 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
@@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
450 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 381 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
451 cleanup_tcp: 382 cleanup_tcp:
452 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 383 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
453 cleanup_frag6:
454 nf_ct_frag6_cleanup();
455 return ret; 384 return ret;
456} 385}
457 386
@@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
463 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); 392 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
464 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 393 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
465 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 394 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
466 nf_ct_frag6_cleanup();
467} 395}
468 396
469module_init(nf_conntrack_l3proto_ipv6_init); 397module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 578f3c1a16db..79d43aa8fa8d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 73static struct netns_frags nf_init_frags;
74 74
75#ifdef CONFIG_SYSCTL 75#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_ipv6_sysctl_table[] = { 76struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 77 {
78 .procname = "nf_conntrack_frag6_timeout", 78 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 79 .data = &nf_init_frags.timeout,
@@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
97 }, 97 },
98 { } 98 { }
99}; 99};
100
101static struct ctl_table_header *nf_ct_frag6_sysctl_header;
100#endif 102#endif
101 103
102static unsigned int nf_hashfn(struct inet_frag_queue *q) 104static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -284,7 +286,7 @@ found:
284 286
285 /* Check for overlap with preceding fragment. */ 287 /* Check for overlap with preceding fragment. */
286 if (prev && 288 if (prev &&
287 (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) 289 (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
288 goto discard_fq; 290 goto discard_fq;
289 291
290 /* Look for overlap with succeeding segment. */ 292 /* Look for overlap with succeeding segment. */
@@ -363,7 +365,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
363 /* If the first fragment is fragmented itself, we split 365 /* If the first fragment is fragmented itself, we split
364 * it to two chunks: the first with data and paged part 366 * it to two chunks: the first with data and paged part
365 * and the second, holding only fragments. */ 367 * and the second, holding only fragments. */
366 if (skb_has_frags(head)) { 368 if (skb_has_frag_list(head)) {
367 struct sk_buff *clone; 369 struct sk_buff *clone;
368 int i, plen = 0; 370 int i, plen = 0;
369 371
@@ -623,11 +625,24 @@ int nf_ct_frag6_init(void)
623 inet_frags_init_net(&nf_init_frags); 625 inet_frags_init_net(&nf_init_frags);
624 inet_frags_init(&nf_frags); 626 inet_frags_init(&nf_frags);
625 627
628#ifdef CONFIG_SYSCTL
629 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
630 nf_ct_frag6_sysctl_table);
631 if (!nf_ct_frag6_sysctl_header) {
632 inet_frags_fini(&nf_frags);
633 return -ENOMEM;
634 }
635#endif
636
626 return 0; 637 return 0;
627} 638}
628 639
629void nf_ct_frag6_cleanup(void) 640void nf_ct_frag6_cleanup(void)
630{ 641{
642#ifdef CONFIG_SYSCTL
643 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
644 nf_ct_frag6_sysctl_header = NULL;
645#endif
631 inet_frags_fini(&nf_frags); 646 inet_frags_fini(&nf_frags);
632 647
633 nf_init_frags.low_thresh = 0; 648 nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 000000000000..99abfb53bab9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,131 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/ipv6.h>
11#include <linux/in6.h>
12#include <linux/netfilter.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/icmp.h>
16#include <linux/sysctl.h>
17#include <net/ipv6.h>
18#include <net/inet_frag.h>
19
20#include <linux/netfilter_ipv6.h>
21#include <linux/netfilter_bridge.h>
22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_l3proto.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_zones.h>
28#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
29#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
30
31static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
32 struct sk_buff *skb)
33{
34 u16 zone = NF_CT_DEFAULT_ZONE;
35
36 if (skb->nfct)
37 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
38
39#ifdef CONFIG_BRIDGE_NETFILTER
40 if (skb->nf_bridge &&
41 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
42 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
43#endif
44 if (hooknum == NF_INET_PRE_ROUTING)
45 return IP6_DEFRAG_CONNTRACK_IN + zone;
46 else
47 return IP6_DEFRAG_CONNTRACK_OUT + zone;
48
49}
50
51static unsigned int ipv6_defrag(unsigned int hooknum,
52 struct sk_buff *skb,
53 const struct net_device *in,
54 const struct net_device *out,
55 int (*okfn)(struct sk_buff *))
56{
57 struct sk_buff *reasm;
58
59 /* Previously seen (loopback)? */
60 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
61 return NF_ACCEPT;
62
63 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
64 /* queued */
65 if (reasm == NULL)
66 return NF_STOLEN;
67
68 /* error occured or not fragmented */
69 if (reasm == skb)
70 return NF_ACCEPT;
71
72 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
73 (struct net_device *)out, okfn);
74
75 return NF_STOLEN;
76}
77
78static struct nf_hook_ops ipv6_defrag_ops[] = {
79 {
80 .hook = ipv6_defrag,
81 .owner = THIS_MODULE,
82 .pf = NFPROTO_IPV6,
83 .hooknum = NF_INET_PRE_ROUTING,
84 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
85 },
86 {
87 .hook = ipv6_defrag,
88 .owner = THIS_MODULE,
89 .pf = NFPROTO_IPV6,
90 .hooknum = NF_INET_LOCAL_OUT,
91 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
92 },
93};
94
95static int __init nf_defrag_init(void)
96{
97 int ret = 0;
98
99 ret = nf_ct_frag6_init();
100 if (ret < 0) {
101 pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
102 return ret;
103 }
104 ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
105 if (ret < 0) {
106 pr_err("nf_defrag_ipv6: can't register hooks\n");
107 goto cleanup_frag6;
108 }
109 return ret;
110
111cleanup_frag6:
112 nf_ct_frag6_cleanup();
113 return ret;
114
115}
116
117static void __exit nf_defrag_fini(void)
118{
119 nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
120 nf_ct_frag6_cleanup();
121}
122
123void nf_defrag_ipv6_enable(void)
124{
125}
126EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
127
128module_init(nf_defrag_init);
129module_exit(nf_defrag_fini);
130
131MODULE_LICENSE("GPL");
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d082eaeefa25..24b3558b8e67 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,8 @@ static const struct snmp_mib snmp6_udp6_list[] = {
126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS), 126 SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS), 127 SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 128 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
129 SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
130 SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
129 SNMP_MIB_SENTINEL 131 SNMP_MIB_SENTINEL
130}; 132};
131 133
@@ -134,6 +136,8 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
134 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), 136 SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
135 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), 137 SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
136 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 138 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
139 SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
140 SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
137 SNMP_MIB_SENTINEL 141 SNMP_MIB_SENTINEL
138}; 142};
139 143
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 1fa3468f0f32..9a7978fdc02a 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,28 +25,15 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; 28const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29static DEFINE_SPINLOCK(inet6_proto_lock);
30
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
35
36 spin_lock_bh(&inet6_proto_lock);
37
38 if (inet6_protos[hash]) {
39 ret = -1;
40 } else {
41 inet6_protos[hash] = prot;
42 ret = 0;
43 }
44
45 spin_unlock_bh(&inet6_proto_lock);
46 33
47 return ret; 34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
35 NULL, prot) ? 0 : -1;
48} 36}
49
50EXPORT_SYMBOL(inet6_add_protocol); 37EXPORT_SYMBOL(inet6_add_protocol);
51 38
52/* 39/*
@@ -57,20 +44,11 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
57{ 44{
58 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 45 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
59 46
60 spin_lock_bh(&inet6_proto_lock); 47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash],
61 48 prot, NULL) == prot) ? 0 : -1;
62 if (inet6_protos[hash] != prot) {
63 ret = -1;
64 } else {
65 inet6_protos[hash] = NULL;
66 ret = 0;
67 }
68
69 spin_unlock_bh(&inet6_proto_lock);
70 49
71 synchronize_net(); 50 synchronize_net();
72 51
73 return ret; 52 return ret;
74} 53}
75
76EXPORT_SYMBOL(inet6_del_protocol); 54EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e677937a07fc..86c39526ba5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -373,7 +373,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
373 373
374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 374static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
375{ 375{
376 if ((raw6_sk(sk)->checksum || sk->sk_filter) && 376 if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) &&
377 skb_checksum_complete(skb)) { 377 skb_checksum_complete(skb)) {
378 atomic_inc(&sk->sk_drops); 378 atomic_inc(&sk->sk_drops);
379 kfree_skb(skb); 379 kfree_skb(skb);
@@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
764 return -EINVAL; 764 return -EINVAL;
765 765
766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
767 return(-EAFNOSUPPORT); 767 return -EAFNOSUPPORT;
768 768
769 /* port is the proto value [0..255] carried in nexthdr */ 769 /* port is the proto value [0..255] carried in nexthdr */
770 proto = ntohs(sin6->sin6_port); 770 proto = ntohs(sin6->sin6_port);
@@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
772 if (!proto) 772 if (!proto)
773 proto = inet->inet_num; 773 proto = inet->inet_num;
774 else if (proto != inet->inet_num) 774 else if (proto != inet->inet_num)
775 return(-EINVAL); 775 return -EINVAL;
776 776
777 if (proto > 255) 777 if (proto > 255)
778 return(-EINVAL); 778 return -EINVAL;
779 779
780 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
781 if (np->sndflow) { 781 if (np->sndflow) {
@@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
985 /* You may get strange result with a positive odd offset; 985 /* You may get strange result with a positive odd offset;
986 RFC2292bis agrees with me. */ 986 RFC2292bis agrees with me. */
987 if (val > 0 && (val&1)) 987 if (val > 0 && (val&1))
988 return(-EINVAL); 988 return -EINVAL;
989 if (val < 0) { 989 if (val < 0) {
990 rp->checksum = 0; 990 rp->checksum = 0;
991 } else { 991 } else {
@@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
997 break; 997 break;
998 998
999 default: 999 default:
1000 return(-ENOPROTOOPT); 1000 return -ENOPROTOOPT;
1001 } 1001 }
1002} 1002}
1003 1003
@@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk)
1190 default: 1190 default:
1191 break; 1191 break;
1192 } 1192 }
1193 return(0); 1193 return 0;
1194} 1194}
1195 1195
1196struct proto rawv6_prot = { 1196struct proto rawv6_prot = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 64cfef1b0a4c..0f2766453759 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -349,7 +349,7 @@ found:
349 349
350 /* Check for overlap with preceding fragment. */ 350 /* Check for overlap with preceding fragment. */
351 if (prev && 351 if (prev &&
352 (FRAG6_CB(prev)->offset + prev->len) - offset > 0) 352 (FRAG6_CB(prev)->offset + prev->len) > offset)
353 goto discard_fq; 353 goto discard_fq;
354 354
355 /* Look for overlap with succeeding segment. */ 355 /* Look for overlap with succeeding segment. */
@@ -458,7 +458,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
458 /* If the first fragment is fragmented itself, we split 458 /* If the first fragment is fragmented itself, we split
459 * it to two chunks: the first with data and paged part 459 * it to two chunks: the first with data and paged part
460 * and the second, holding only fragments. */ 460 * and the second, holding only fragments. */
461 if (skb_has_frags(head)) { 461 if (skb_has_frag_list(head)) {
462 struct sk_buff *clone; 462 struct sk_buff *clone;
463 int i, plen = 0; 463 int i, plen = 0;
464 464
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a275c6e1e25c..96455ffb76fb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = {
109 .link_failure = ip6_link_failure, 109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu, 110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out, 111 .local_out = __ip6_local_out,
112 .entries = ATOMIC_INIT(0),
113}; 112};
114 113
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
122 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check, 122 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entries = ATOMIC_INIT(0),
126}; 124};
127 125
128static struct rt6_info ip6_null_entry_template = { 126static struct rt6_info ip6_null_entry_template = {
@@ -217,14 +215,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 215
218static __inline__ int rt6_check_expired(const struct rt6_info *rt) 216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{ 217{
220 return (rt->rt6i_flags & RTF_EXPIRES && 218 return (rt->rt6i_flags & RTF_EXPIRES) &&
221 time_after(jiffies, rt->rt6i_expires)); 219 time_after(jiffies, rt->rt6i_expires);
222} 220}
223 221
224static inline int rt6_need_strict(struct in6_addr *daddr) 222static inline int rt6_need_strict(struct in6_addr *daddr)
225{ 223{
226 return (ipv6_addr_type(daddr) & 224 return ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
228} 226}
229 227
230/* 228/*
@@ -440,7 +438,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
440 __func__, match); 438 __func__, match);
441 439
442 net = dev_net(rt0->rt6i_dev); 440 net = dev_net(rt0->rt6i_dev);
443 return (match ? match : net->ipv6.ip6_null_entry); 441 return match ? match : net->ipv6.ip6_null_entry;
444} 442}
445 443
446#ifdef CONFIG_IPV6_ROUTE_INFO 444#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -859,7 +857,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
859 857
860 dst_release(*dstp); 858 dst_release(*dstp);
861 *dstp = new; 859 *dstp = new;
862 return (new ? 0 : -ENOMEM); 860 return new ? 0 : -ENOMEM;
863} 861}
864EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 862EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 863
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
1058 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1059 int entries;
1061 1060
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) && 1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 atomic_read(&ops->entries) <= rt_max_size) 1063 entries <= rt_max_size)
1064 goto out; 1064 goto out;
1065 1065
1066 net->ipv6.ip6_rt_gc_expire++; 1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now; 1068 net->ipv6.ip6_rt_last_gc = now;
1069 if (atomic_read(&ops->entries) < ops->gc_thresh) 1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1072out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return (atomic_read(&ops->entries) > rt_max_size); 1074 return entries > rt_max_size;
1074} 1075}
1075 1076
1076/* Clean host part of a prefix. Not necessary in radix tree, 1077/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1169,6 +1170,8 @@ int ip6_route_add(struct fib6_config *cfg)
1169 1170
1170 if (addr_type & IPV6_ADDR_MULTICAST) 1171 if (addr_type & IPV6_ADDR_MULTICAST)
1171 rt->dst.input = ip6_mc_input; 1172 rt->dst.input = ip6_mc_input;
1173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
1172 else 1175 else
1173 rt->dst.input = ip6_forward; 1176 rt->dst.input = ip6_forward;
1174 1177
@@ -1190,7 +1193,8 @@ int ip6_route_add(struct fib6_config *cfg)
1190 they would result in kernel looping; promote them to reject routes 1193 they would result in kernel looping; promote them to reject routes
1191 */ 1194 */
1192 if ((cfg->fc_flags & RTF_REJECT) || 1195 if ((cfg->fc_flags & RTF_REJECT) ||
1193 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
1194 /* hold loopback dev/idev if we haven't done so. */ 1198 /* hold loopback dev/idev if we haven't done so. */
1195 if (dev != net->loopback_dev) { 1199 if (dev != net->loopback_dev) {
1196 if (dev) { 1200 if (dev) {
@@ -1941,8 +1945,12 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1941 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1945 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1942 struct neighbour *neigh; 1946 struct neighbour *neigh;
1943 1947
1944 if (rt == NULL) 1948 if (rt == NULL) {
1949 if (net_ratelimit())
1950 pr_warning("IPv6: Maximum number of routes reached,"
1951 " consider increasing route/max_size.\n");
1945 return ERR_PTR(-ENOMEM); 1952 return ERR_PTR(-ENOMEM);
1953 }
1946 1954
1947 dev_hold(net->loopback_dev); 1955 dev_hold(net->loopback_dev);
1948 in6_dev_hold(idev); 1956 in6_dev_hold(idev);
@@ -2102,6 +2110,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2102 if (rtm->rtm_type == RTN_UNREACHABLE) 2110 if (rtm->rtm_type == RTN_UNREACHABLE)
2103 cfg->fc_flags |= RTF_REJECT; 2111 cfg->fc_flags |= RTF_REJECT;
2104 2112
2113 if (rtm->rtm_type == RTN_LOCAL)
2114 cfg->fc_flags |= RTF_LOCAL;
2115
2105 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2116 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2106 cfg->fc_nlinfo.nlh = nlh; 2117 cfg->fc_nlinfo.nlh = nlh;
2107 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2118 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2222,6 +2233,8 @@ static int rt6_fill_node(struct net *net,
2222 NLA_PUT_U32(skb, RTA_TABLE, table); 2233 NLA_PUT_U32(skb, RTA_TABLE, table);
2223 if (rt->rt6i_flags&RTF_REJECT) 2234 if (rt->rt6i_flags&RTF_REJECT)
2224 rtm->rtm_type = RTN_UNREACHABLE; 2235 rtm->rtm_type = RTN_UNREACHABLE;
2236 else if (rt->rt6i_flags&RTF_LOCAL)
2237 rtm->rtm_type = RTN_LOCAL;
2225 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2238 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2226 rtm->rtm_type = RTN_LOCAL; 2239 rtm->rtm_type = RTN_LOCAL;
2227 else 2240 else
@@ -2516,7 +2529,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2516 net->ipv6.rt6_stats->fib_rt_alloc, 2529 net->ipv6.rt6_stats->fib_rt_alloc,
2517 net->ipv6.rt6_stats->fib_rt_entries, 2530 net->ipv6.rt6_stats->fib_rt_entries,
2518 net->ipv6.rt6_stats->fib_rt_cache, 2531 net->ipv6.rt6_stats->fib_rt_cache,
2519 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2532 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2520 net->ipv6.rt6_stats->fib_discarded_routes); 2533 net->ipv6.rt6_stats->fib_discarded_routes);
2521 2534
2522 return 0; 2535 return 0;
@@ -2658,11 +2671,14 @@ static int __net_init ip6_route_net_init(struct net *net)
2658 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2671 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2659 sizeof(net->ipv6.ip6_dst_ops)); 2672 sizeof(net->ipv6.ip6_dst_ops));
2660 2673
2674 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2675 goto out_ip6_dst_ops;
2676
2661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2677 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry), 2678 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL); 2679 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry) 2680 if (!net->ipv6.ip6_null_entry)
2665 goto out_ip6_dst_ops; 2681 goto out_ip6_dst_entries;
2666 net->ipv6.ip6_null_entry->dst.path = 2682 net->ipv6.ip6_null_entry->dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry; 2683 (struct dst_entry *)net->ipv6.ip6_null_entry;
2668 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2684 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
@@ -2712,6 +2728,8 @@ out_ip6_prohibit_entry:
2712out_ip6_null_entry: 2728out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry); 2729 kfree(net->ipv6.ip6_null_entry);
2714#endif 2730#endif
2731out_ip6_dst_entries:
2732 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2715out_ip6_dst_ops: 2733out_ip6_dst_ops:
2716 goto out; 2734 goto out;
2717} 2735}
@@ -2727,6 +2745,7 @@ static void __net_exit ip6_route_net_exit(struct net *net)
2727 kfree(net->ipv6.ip6_prohibit_entry); 2745 kfree(net->ipv6.ip6_prohibit_entry);
2728 kfree(net->ipv6.ip6_blk_hole_entry); 2746 kfree(net->ipv6.ip6_blk_hole_entry);
2729#endif 2747#endif
2748 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2730} 2749}
2731 2750
2732static struct pernet_operations ip6_route_net_ops = { 2751static struct pernet_operations ip6_route_net_ops = {
@@ -2750,10 +2769,14 @@ int __init ip6_route_init(void)
2750 if (!ip6_dst_ops_template.kmem_cachep) 2769 if (!ip6_dst_ops_template.kmem_cachep)
2751 goto out; 2770 goto out;
2752 2771
2753 ret = register_pernet_subsys(&ip6_route_net_ops); 2772 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2754 if (ret) 2773 if (ret)
2755 goto out_kmem_cache; 2774 goto out_kmem_cache;
2756 2775
2776 ret = register_pernet_subsys(&ip6_route_net_ops);
2777 if (ret)
2778 goto out_dst_entries;
2779
2757 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2780 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2758 2781
2759 /* Registering of the loopback is done before this portion of code, 2782 /* Registering of the loopback is done before this portion of code,
@@ -2800,6 +2823,8 @@ out_fib6_init:
2800 fib6_gc_cleanup(); 2823 fib6_gc_cleanup();
2801out_register_subsys: 2824out_register_subsys:
2802 unregister_pernet_subsys(&ip6_route_net_ops); 2825 unregister_pernet_subsys(&ip6_route_net_ops);
2826out_dst_entries:
2827 dst_entries_destroy(&ip6_dst_blackhole_ops);
2803out_kmem_cache: 2828out_kmem_cache:
2804 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2829 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2805 goto out; 2830 goto out;
@@ -2812,5 +2837,6 @@ void ip6_route_cleanup(void)
2812 xfrm6_fini(); 2837 xfrm6_fini();
2813 fib6_gc_cleanup(); 2838 fib6_gc_cleanup();
2814 unregister_pernet_subsys(&ip6_route_net_ops); 2839 unregister_pernet_subsys(&ip6_route_net_ops);
2840 dst_entries_destroy(&ip6_dst_blackhole_ops);
2815 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2841 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2816} 2842}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4699cd3c3118..d6bfaec3bbbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -63,36 +63,63 @@
63#define HASH_SIZE 16 63#define HASH_SIZE 16
64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
65 65
66static void ipip6_tunnel_init(struct net_device *dev); 66static int ipip6_tunnel_init(struct net_device *dev);
67static void ipip6_tunnel_setup(struct net_device *dev); 67static void ipip6_tunnel_setup(struct net_device *dev);
68static void ipip6_dev_free(struct net_device *dev);
68 69
69static int sit_net_id __read_mostly; 70static int sit_net_id __read_mostly;
70struct sit_net { 71struct sit_net {
71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 72 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 73 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 74 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
74 struct ip_tunnel *tunnels_wc[1]; 75 struct ip_tunnel __rcu *tunnels_wc[1];
75 struct ip_tunnel **tunnels[4]; 76 struct ip_tunnel __rcu **tunnels[4];
76 77
77 struct net_device *fb_tunnel_dev; 78 struct net_device *fb_tunnel_dev;
78}; 79};
79 80
80/* 81/*
81 * Locking : hash tables are protected by RCU and a spinlock 82 * Locking : hash tables are protected by RCU and RTNL
82 */ 83 */
83static DEFINE_SPINLOCK(ipip6_lock);
84 84
85#define for_each_ip_tunnel_rcu(start) \ 85#define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
87 87
88/* often modified stats are per cpu, other are shared (netdev->stats) */
89struct pcpu_tstats {
90 unsigned long rx_packets;
91 unsigned long rx_bytes;
92 unsigned long tx_packets;
93 unsigned long tx_bytes;
94};
95
96static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
97{
98 struct pcpu_tstats sum = { 0 };
99 int i;
100
101 for_each_possible_cpu(i) {
102 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
103
104 sum.rx_packets += tstats->rx_packets;
105 sum.rx_bytes += tstats->rx_bytes;
106 sum.tx_packets += tstats->tx_packets;
107 sum.tx_bytes += tstats->tx_bytes;
108 }
109 dev->stats.rx_packets = sum.rx_packets;
110 dev->stats.rx_bytes = sum.rx_bytes;
111 dev->stats.tx_packets = sum.tx_packets;
112 dev->stats.tx_bytes = sum.tx_bytes;
113 return &dev->stats;
114}
88/* 115/*
89 * Must be invoked with rcu_read_lock 116 * Must be invoked with rcu_read_lock
90 */ 117 */
91static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 118static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
92 struct net_device *dev, __be32 remote, __be32 local) 119 struct net_device *dev, __be32 remote, __be32 local)
93{ 120{
94 unsigned h0 = HASH(remote); 121 unsigned int h0 = HASH(remote);
95 unsigned h1 = HASH(local); 122 unsigned int h1 = HASH(local);
96 struct ip_tunnel *t; 123 struct ip_tunnel *t;
97 struct sit_net *sitn = net_generic(net, sit_net_id); 124 struct sit_net *sitn = net_generic(net, sit_net_id);
98 125
@@ -121,12 +148,12 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
121 return NULL; 148 return NULL;
122} 149}
123 150
124static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 151static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
125 struct ip_tunnel_parm *parms) 152 struct ip_tunnel_parm *parms)
126{ 153{
127 __be32 remote = parms->iph.daddr; 154 __be32 remote = parms->iph.daddr;
128 __be32 local = parms->iph.saddr; 155 __be32 local = parms->iph.saddr;
129 unsigned h = 0; 156 unsigned int h = 0;
130 int prio = 0; 157 int prio = 0;
131 158
132 if (remote) { 159 if (remote) {
@@ -140,7 +167,7 @@ static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
140 return &sitn->tunnels[prio][h]; 167 return &sitn->tunnels[prio][h];
141} 168}
142 169
143static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 170static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
144 struct ip_tunnel *t) 171 struct ip_tunnel *t)
145{ 172{
146 return __ipip6_bucket(sitn, &t->parms); 173 return __ipip6_bucket(sitn, &t->parms);
@@ -148,13 +175,14 @@ static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
148 175
149static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 176static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
150{ 177{
151 struct ip_tunnel **tp; 178 struct ip_tunnel __rcu **tp;
152 179 struct ip_tunnel *iter;
153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 180
154 if (t == *tp) { 181 for (tp = ipip6_bucket(sitn, t);
155 spin_lock_bh(&ipip6_lock); 182 (iter = rtnl_dereference(*tp)) != NULL;
156 *tp = t->next; 183 tp = &iter->next) {
157 spin_unlock_bh(&ipip6_lock); 184 if (t == iter) {
185 rcu_assign_pointer(*tp, t->next);
158 break; 186 break;
159 } 187 }
160 } 188 }
@@ -162,12 +190,10 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
162 190
163static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 191static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
164{ 192{
165 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 193 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
166 194
167 spin_lock_bh(&ipip6_lock); 195 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
168 t->next = *tp;
169 rcu_assign_pointer(*tp, t); 196 rcu_assign_pointer(*tp, t);
170 spin_unlock_bh(&ipip6_lock);
171} 197}
172 198
173static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 199static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -187,17 +213,20 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
187#endif 213#endif
188} 214}
189 215
190static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 216static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
191 struct ip_tunnel_parm *parms, int create) 217 struct ip_tunnel_parm *parms, int create)
192{ 218{
193 __be32 remote = parms->iph.daddr; 219 __be32 remote = parms->iph.daddr;
194 __be32 local = parms->iph.saddr; 220 __be32 local = parms->iph.saddr;
195 struct ip_tunnel *t, **tp, *nt; 221 struct ip_tunnel *t, *nt;
222 struct ip_tunnel __rcu **tp;
196 struct net_device *dev; 223 struct net_device *dev;
197 char name[IFNAMSIZ]; 224 char name[IFNAMSIZ];
198 struct sit_net *sitn = net_generic(net, sit_net_id); 225 struct sit_net *sitn = net_generic(net, sit_net_id);
199 226
200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 227 for (tp = __ipip6_bucket(sitn, parms);
228 (t = rtnl_dereference(*tp)) != NULL;
229 tp = &t->next) {
201 if (local == t->parms.iph.saddr && 230 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && 231 remote == t->parms.iph.daddr &&
203 parms->link == t->parms.link) { 232 parms->link == t->parms.link) {
@@ -213,7 +242,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
213 if (parms->name[0]) 242 if (parms->name[0])
214 strlcpy(name, parms->name, IFNAMSIZ); 243 strlcpy(name, parms->name, IFNAMSIZ);
215 else 244 else
216 sprintf(name, "sit%%d"); 245 strcpy(name, "sit%d");
217 246
218 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 247 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
219 if (dev == NULL) 248 if (dev == NULL)
@@ -229,7 +258,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
229 nt = netdev_priv(dev); 258 nt = netdev_priv(dev);
230 259
231 nt->parms = *parms; 260 nt->parms = *parms;
232 ipip6_tunnel_init(dev); 261 if (ipip6_tunnel_init(dev) < 0)
262 goto failed_free;
233 ipip6_tunnel_clone_6rd(dev, sitn); 263 ipip6_tunnel_clone_6rd(dev, sitn);
234 264
235 if (parms->i_flags & SIT_ISATAP) 265 if (parms->i_flags & SIT_ISATAP)
@@ -244,7 +274,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
244 return nt; 274 return nt;
245 275
246failed_free: 276failed_free:
247 free_netdev(dev); 277 ipip6_dev_free(dev);
248failed: 278failed:
249 return NULL; 279 return NULL;
250} 280}
@@ -340,7 +370,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 370
341 ASSERT_RTNL(); 371 ASSERT_RTNL();
342 372
343 for (p = t->prl; p; p = p->next) { 373 for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
344 if (p->addr == a->addr) { 374 if (p->addr == a->addr) {
345 if (chg) { 375 if (chg) {
346 p->flags = a->flags; 376 p->flags = a->flags;
@@ -451,15 +481,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
451 struct sit_net *sitn = net_generic(net, sit_net_id); 481 struct sit_net *sitn = net_generic(net, sit_net_id);
452 482
453 if (dev == sitn->fb_tunnel_dev) { 483 if (dev == sitn->fb_tunnel_dev) {
454 spin_lock_bh(&ipip6_lock); 484 rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
455 sitn->tunnels_wc[0] = NULL;
456 spin_unlock_bh(&ipip6_lock);
457 dev_put(dev);
458 } else { 485 } else {
459 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 486 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
460 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 487 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
461 dev_put(dev);
462 } 488 }
489 dev_put(dev);
463} 490}
464 491
465 492
@@ -548,6 +575,8 @@ static int ipip6_rcv(struct sk_buff *skb)
548 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 575 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
549 iph->saddr, iph->daddr); 576 iph->saddr, iph->daddr);
550 if (tunnel != NULL) { 577 if (tunnel != NULL) {
578 struct pcpu_tstats *tstats;
579
551 secpath_reset(skb); 580 secpath_reset(skb);
552 skb->mac_header = skb->network_header; 581 skb->mac_header = skb->network_header;
553 skb_reset_network_header(skb); 582 skb_reset_network_header(skb);
@@ -563,10 +592,16 @@ static int ipip6_rcv(struct sk_buff *skb)
563 return 0; 592 return 0;
564 } 593 }
565 594
566 skb_tunnel_rx(skb, tunnel->dev); 595 tstats = this_cpu_ptr(tunnel->dev->tstats);
596 tstats->rx_packets++;
597 tstats->rx_bytes += skb->len;
598
599 __skb_tunnel_rx(skb, tunnel->dev);
567 600
568 ipip6_ecn_decapsulate(iph, skb); 601 ipip6_ecn_decapsulate(iph, skb);
602
569 netif_rx(skb); 603 netif_rx(skb);
604
570 rcu_read_unlock(); 605 rcu_read_unlock();
571 return 0; 606 return 0;
572 } 607 }
@@ -590,7 +625,7 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
590#ifdef CONFIG_IPV6_SIT_6RD 625#ifdef CONFIG_IPV6_SIT_6RD
591 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, 626 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
592 tunnel->ip6rd.prefixlen)) { 627 tunnel->ip6rd.prefixlen)) {
593 unsigned pbw0, pbi0; 628 unsigned int pbw0, pbi0;
594 int pbi1; 629 int pbi1;
595 u32 d; 630 u32 d;
596 631
@@ -625,14 +660,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
625 struct net_device *dev) 660 struct net_device *dev)
626{ 661{
627 struct ip_tunnel *tunnel = netdev_priv(dev); 662 struct ip_tunnel *tunnel = netdev_priv(dev);
628 struct net_device_stats *stats = &dev->stats; 663 struct pcpu_tstats *tstats;
629 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
630 struct iphdr *tiph = &tunnel->parms.iph; 664 struct iphdr *tiph = &tunnel->parms.iph;
631 struct ipv6hdr *iph6 = ipv6_hdr(skb); 665 struct ipv6hdr *iph6 = ipv6_hdr(skb);
632 u8 tos = tunnel->parms.iph.tos; 666 u8 tos = tunnel->parms.iph.tos;
633 __be16 df = tiph->frag_off; 667 __be16 df = tiph->frag_off;
634 struct rtable *rt; /* Route to the other host */ 668 struct rtable *rt; /* Route to the other host */
635 struct net_device *tdev; /* Device to other host */ 669 struct net_device *tdev; /* Device to other host */
636 struct iphdr *iph; /* Our new IP header */ 670 struct iphdr *iph; /* Our new IP header */
637 unsigned int max_headroom; /* The extra header space needed */ 671 unsigned int max_headroom; /* The extra header space needed */
638 __be32 dst = tiph->daddr; 672 __be32 dst = tiph->daddr;
@@ -703,20 +737,20 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
703 .oif = tunnel->parms.link, 737 .oif = tunnel->parms.link,
704 .proto = IPPROTO_IPV6 }; 738 .proto = IPPROTO_IPV6 };
705 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 739 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
706 stats->tx_carrier_errors++; 740 dev->stats.tx_carrier_errors++;
707 goto tx_error_icmp; 741 goto tx_error_icmp;
708 } 742 }
709 } 743 }
710 if (rt->rt_type != RTN_UNICAST) { 744 if (rt->rt_type != RTN_UNICAST) {
711 ip_rt_put(rt); 745 ip_rt_put(rt);
712 stats->tx_carrier_errors++; 746 dev->stats.tx_carrier_errors++;
713 goto tx_error_icmp; 747 goto tx_error_icmp;
714 } 748 }
715 tdev = rt->dst.dev; 749 tdev = rt->dst.dev;
716 750
717 if (tdev == dev) { 751 if (tdev == dev) {
718 ip_rt_put(rt); 752 ip_rt_put(rt);
719 stats->collisions++; 753 dev->stats.collisions++;
720 goto tx_error; 754 goto tx_error;
721 } 755 }
722 756
@@ -724,7 +758,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
724 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 758 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
725 759
726 if (mtu < 68) { 760 if (mtu < 68) {
727 stats->collisions++; 761 dev->stats.collisions++;
728 ip_rt_put(rt); 762 ip_rt_put(rt);
729 goto tx_error; 763 goto tx_error;
730 } 764 }
@@ -763,7 +797,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
763 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 797 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
764 if (!new_skb) { 798 if (!new_skb) {
765 ip_rt_put(rt); 799 ip_rt_put(rt);
766 txq->tx_dropped++; 800 dev->stats.tx_dropped++;
767 dev_kfree_skb(skb); 801 dev_kfree_skb(skb);
768 return NETDEV_TX_OK; 802 return NETDEV_TX_OK;
769 } 803 }
@@ -799,14 +833,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
799 iph->ttl = iph6->hop_limit; 833 iph->ttl = iph6->hop_limit;
800 834
801 nf_reset(skb); 835 nf_reset(skb);
802 836 tstats = this_cpu_ptr(dev->tstats);
803 IPTUNNEL_XMIT(); 837 __IPTUNNEL_XMIT(tstats, &dev->stats);
804 return NETDEV_TX_OK; 838 return NETDEV_TX_OK;
805 839
806tx_error_icmp: 840tx_error_icmp:
807 dst_link_failure(skb); 841 dst_link_failure(skb);
808tx_error: 842tx_error:
809 stats->tx_errors++; 843 dev->stats.tx_errors++;
810 dev_kfree_skb(skb); 844 dev_kfree_skb(skb);
811 return NETDEV_TX_OK; 845 return NETDEV_TX_OK;
812} 846}
@@ -929,6 +963,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
929 } 963 }
930 t = netdev_priv(dev); 964 t = netdev_priv(dev);
931 ipip6_tunnel_unlink(sitn, t); 965 ipip6_tunnel_unlink(sitn, t);
966 synchronize_net();
932 t->parms.iph.saddr = p.iph.saddr; 967 t->parms.iph.saddr = p.iph.saddr;
933 t->parms.iph.daddr = p.iph.daddr; 968 t->parms.iph.daddr = p.iph.daddr;
934 memcpy(dev->dev_addr, &p.iph.saddr, 4); 969 memcpy(dev->dev_addr, &p.iph.saddr, 4);
@@ -1083,12 +1118,19 @@ static const struct net_device_ops ipip6_netdev_ops = {
1083 .ndo_start_xmit = ipip6_tunnel_xmit, 1118 .ndo_start_xmit = ipip6_tunnel_xmit,
1084 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1119 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1085 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1120 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1121 .ndo_get_stats = ipip6_get_stats,
1086}; 1122};
1087 1123
1124static void ipip6_dev_free(struct net_device *dev)
1125{
1126 free_percpu(dev->tstats);
1127 free_netdev(dev);
1128}
1129
1088static void ipip6_tunnel_setup(struct net_device *dev) 1130static void ipip6_tunnel_setup(struct net_device *dev)
1089{ 1131{
1090 dev->netdev_ops = &ipip6_netdev_ops; 1132 dev->netdev_ops = &ipip6_netdev_ops;
1091 dev->destructor = free_netdev; 1133 dev->destructor = ipip6_dev_free;
1092 1134
1093 dev->type = ARPHRD_SIT; 1135 dev->type = ARPHRD_SIT;
1094 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 1136 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -1098,9 +1140,10 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1098 dev->iflink = 0; 1140 dev->iflink = 0;
1099 dev->addr_len = 4; 1141 dev->addr_len = 4;
1100 dev->features |= NETIF_F_NETNS_LOCAL; 1142 dev->features |= NETIF_F_NETNS_LOCAL;
1143 dev->features |= NETIF_F_LLTX;
1101} 1144}
1102 1145
1103static void ipip6_tunnel_init(struct net_device *dev) 1146static int ipip6_tunnel_init(struct net_device *dev)
1104{ 1147{
1105 struct ip_tunnel *tunnel = netdev_priv(dev); 1148 struct ip_tunnel *tunnel = netdev_priv(dev);
1106 1149
@@ -1111,9 +1154,14 @@ static void ipip6_tunnel_init(struct net_device *dev)
1111 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1154 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1112 1155
1113 ipip6_tunnel_bind_dev(dev); 1156 ipip6_tunnel_bind_dev(dev);
1157 dev->tstats = alloc_percpu(struct pcpu_tstats);
1158 if (!dev->tstats)
1159 return -ENOMEM;
1160
1161 return 0;
1114} 1162}
1115 1163
1116static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) 1164static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1117{ 1165{
1118 struct ip_tunnel *tunnel = netdev_priv(dev); 1166 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct iphdr *iph = &tunnel->parms.iph; 1167 struct iphdr *iph = &tunnel->parms.iph;
@@ -1128,11 +1176,15 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1128 iph->ihl = 5; 1176 iph->ihl = 5;
1129 iph->ttl = 64; 1177 iph->ttl = 64;
1130 1178
1179 dev->tstats = alloc_percpu(struct pcpu_tstats);
1180 if (!dev->tstats)
1181 return -ENOMEM;
1131 dev_hold(dev); 1182 dev_hold(dev);
1132 sitn->tunnels_wc[0] = tunnel; 1183 sitn->tunnels_wc[0] = tunnel;
1184 return 0;
1133} 1185}
1134 1186
1135static struct xfrm_tunnel sit_handler = { 1187static struct xfrm_tunnel sit_handler __read_mostly = {
1136 .handler = ipip6_rcv, 1188 .handler = ipip6_rcv,
1137 .err_handler = ipip6_err, 1189 .err_handler = ipip6_err,
1138 .priority = 1, 1190 .priority = 1,
@@ -1173,7 +1225,10 @@ static int __net_init sit_init_net(struct net *net)
1173 } 1225 }
1174 dev_net_set(sitn->fb_tunnel_dev, net); 1226 dev_net_set(sitn->fb_tunnel_dev, net);
1175 1227
1176 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); 1228 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1229 if (err)
1230 goto err_dev_free;
1231
1177 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); 1232 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1178 1233
1179 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1234 if ((err = register_netdev(sitn->fb_tunnel_dev)))
@@ -1183,7 +1238,8 @@ static int __net_init sit_init_net(struct net *net)
1183 1238
1184err_reg_dev: 1239err_reg_dev:
1185 dev_put(sitn->fb_tunnel_dev); 1240 dev_put(sitn->fb_tunnel_dev);
1186 free_netdev(sitn->fb_tunnel_dev); 1241err_dev_free:
1242 ipip6_dev_free(sitn->fb_tunnel_dev);
1187err_alloc_dev: 1243err_alloc_dev:
1188 return err; 1244 return err;
1189} 1245}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..7e41e2cbb85e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (usin->sin6_family != AF_INET6) 141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT); 142 return -EAFNOSUPPORT;
143 143
144 memset(&fl, 0, sizeof(fl)); 144 memset(&fl, 0, sizeof(fl));
145 145
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1409 1409
1410 newsk = tcp_create_openreq_child(sk, req, skb); 1410 newsk = tcp_create_openreq_child(sk, req, skb);
1411 if (newsk == NULL) 1411 if (newsk == NULL)
1412 goto out; 1412 goto out_nonewsk;
1413 1413
1414 /* 1414 /*
1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1497 } 1497 }
1498#endif 1498#endif
1499 1499
1500 if (__inet_inherit_port(sk, newsk) < 0) {
1501 sock_put(newsk);
1502 goto out;
1503 }
1500 __inet6_hash(newsk, NULL); 1504 __inet6_hash(newsk, NULL);
1501 __inet_inherit_port(sk, newsk);
1502 1505
1503 return newsk; 1506 return newsk;
1504 1507
1505out_overflow: 1508out_overflow:
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1509 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507out: 1510out_nonewsk:
1508 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1509 if (opt && opt != np->opt) 1511 if (opt && opt != np->opt)
1510 sock_kfree_s(sk, opt, opt->tot_len); 1512 sock_kfree_s(sk, opt, opt->tot_len);
1511 dst_release(dst); 1513 dst_release(dst);
1514out:
1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1512 return NULL; 1516 return NULL;
1513} 1517}
1514 1518
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index fc3c86a47452..4f3cec12aa85 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,28 +30,31 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers; 33static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers; 34static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
38{ 38{
39 struct xfrm6_tunnel **pprev; 39 struct xfrm6_tunnel __rcu **pprev;
40 struct xfrm6_tunnel *t;
40 int ret = -EEXIST; 41 int ret = -EEXIST;
41 int priority = handler->priority; 42 int priority = handler->priority;
42 43
43 mutex_lock(&tunnel6_mutex); 44 mutex_lock(&tunnel6_mutex);
44 45
45 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 46 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
46 *pprev; pprev = &(*pprev)->next) { 47 (t = rcu_dereference_protected(*pprev,
47 if ((*pprev)->priority > priority) 48 lockdep_is_held(&tunnel6_mutex))) != NULL;
49 pprev = &t->next) {
50 if (t->priority > priority)
48 break; 51 break;
49 if ((*pprev)->priority == priority) 52 if (t->priority == priority)
50 goto err; 53 goto err;
51 } 54 }
52 55
53 handler->next = *pprev; 56 handler->next = *pprev;
54 *pprev = handler; 57 rcu_assign_pointer(*pprev, handler);
55 58
56 ret = 0; 59 ret = 0;
57 60
@@ -65,14 +68,17 @@ EXPORT_SYMBOL(xfrm6_tunnel_register);
65 68
66int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) 69int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
67{ 70{
68 struct xfrm6_tunnel **pprev; 71 struct xfrm6_tunnel __rcu **pprev;
72 struct xfrm6_tunnel *t;
69 int ret = -ENOENT; 73 int ret = -ENOENT;
70 74
71 mutex_lock(&tunnel6_mutex); 75 mutex_lock(&tunnel6_mutex);
72 76
73 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; 77 for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
74 *pprev; pprev = &(*pprev)->next) { 78 (t = rcu_dereference_protected(*pprev,
75 if (*pprev == handler) { 79 lockdep_is_held(&tunnel6_mutex))) != NULL;
80 pprev = &t->next) {
81 if (t == handler) {
76 *pprev = handler->next; 82 *pprev = handler->next;
77 ret = 0; 83 ret = 0;
78 break; 84 break;
@@ -88,6 +94,11 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
88 94
89EXPORT_SYMBOL(xfrm6_tunnel_deregister); 95EXPORT_SYMBOL(xfrm6_tunnel_deregister);
90 96
97#define for_each_tunnel_rcu(head, handler) \
98 for (handler = rcu_dereference(head); \
99 handler != NULL; \
100 handler = rcu_dereference(handler->next)) \
101
91static int tunnel6_rcv(struct sk_buff *skb) 102static int tunnel6_rcv(struct sk_buff *skb)
92{ 103{
93 struct xfrm6_tunnel *handler; 104 struct xfrm6_tunnel *handler;
@@ -95,7 +106,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 106 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
96 goto drop; 107 goto drop;
97 108
98 for (handler = tunnel6_handlers; handler; handler = handler->next) 109 for_each_tunnel_rcu(tunnel6_handlers, handler)
99 if (!handler->handler(skb)) 110 if (!handler->handler(skb))
100 return 0; 111 return 0;
101 112
@@ -113,7 +124,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
113 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 124 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
114 goto drop; 125 goto drop;
115 126
116 for (handler = tunnel46_handlers; handler; handler = handler->next) 127 for_each_tunnel_rcu(tunnel46_handlers, handler)
117 if (!handler->handler(skb)) 128 if (!handler->handler(skb))
118 return 0; 129 return 0;
119 130
@@ -129,7 +140,7 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
129{ 140{
130 struct xfrm6_tunnel *handler; 141 struct xfrm6_tunnel *handler;
131 142
132 for (handler = tunnel6_handlers; handler; handler = handler->next) 143 for_each_tunnel_rcu(tunnel6_handlers, handler)
133 if (!handler->err_handler(skb, opt, type, code, offset, info)) 144 if (!handler->err_handler(skb, opt, type, code, offset, info))
134 break; 145 break;
135} 146}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5acb3560ff15..91def93bec85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -122,8 +122,8 @@ static void udp_v6_rehash(struct sock *sk)
122 122
123static inline int compute_score(struct sock *sk, struct net *net, 123static inline int compute_score(struct sock *sk, struct net *net,
124 unsigned short hnum, 124 unsigned short hnum,
125 struct in6_addr *saddr, __be16 sport, 125 const struct in6_addr *saddr, __be16 sport,
126 struct in6_addr *daddr, __be16 dport, 126 const struct in6_addr *daddr, __be16 dport,
127 int dif) 127 int dif)
128{ 128{
129 int score = -1; 129 int score = -1;
@@ -239,8 +239,8 @@ exact_match:
239} 239}
240 240
241static struct sock *__udp6_lib_lookup(struct net *net, 241static struct sock *__udp6_lib_lookup(struct net *net,
242 struct in6_addr *saddr, __be16 sport, 242 const struct in6_addr *saddr, __be16 sport,
243 struct in6_addr *daddr, __be16 dport, 243 const struct in6_addr *daddr, __be16 dport,
244 int dif, struct udp_table *udptable) 244 int dif, struct udp_table *udptable)
245{ 245{
246 struct sock *sk, *result; 246 struct sock *sk, *result;
@@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
320 udptable); 320 udptable);
321} 321}
322 322
323struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
324 const struct in6_addr *daddr, __be16 dport, int dif)
325{
326 return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
327}
328EXPORT_SYMBOL_GPL(udp6_lib_lookup);
329
330
323/* 331/*
324 * This should be easy, if there is something there we 332 * This should be easy, if there is something there we
325 * return it, otherwise we block. 333 * return it, otherwise we block.
@@ -519,7 +527,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
519 } 527 }
520 } 528 }
521 529
522 if (sk->sk_filter) { 530 if (rcu_dereference_raw(sk->sk_filter)) {
523 if (udp_lib_checksum_complete(skb)) 531 if (udp_lib_checksum_complete(skb))
524 goto drop; 532 goto drop;
525 } 533 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6baeabbbca82..7e74023ea6e4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 202 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = {
255 .ifdown = xfrm6_dst_ifdown, 255 .ifdown = xfrm6_dst_ifdown,
256 .local_out = __ip6_local_out, 256 .local_out = __ip6_local_out,
257 .gc_thresh = 1024, 257 .gc_thresh = 1024,
258 .entries = ATOMIC_INIT(0),
259}; 258};
260 259
261static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 260static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@ int __init xfrm6_init(void)
312 */ 311 */
313 gc_thresh = FIB6_TABLE_HASHSZ * 8; 312 gc_thresh = FIB6_TABLE_HASHSZ * 8;
314 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; 313 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
314 dst_entries_init(&xfrm6_dst_ops);
315 315
316 ret = xfrm6_policy_init(); 316 ret = xfrm6_policy_init();
317 if (ret) 317 if (ret) {
318 dst_entries_destroy(&xfrm6_dst_ops);
318 goto out; 319 goto out;
319 320 }
320 ret = xfrm6_state_init(); 321 ret = xfrm6_state_init();
321 if (ret) 322 if (ret)
322 goto out_policy; 323 goto out_policy;
@@ -341,4 +342,5 @@ void xfrm6_fini(void)
341 //xfrm6_input_fini(); 342 //xfrm6_input_fini();
342 xfrm6_policy_fini(); 343 xfrm6_policy_fini();
343 xfrm6_state_fini(); 344 xfrm6_state_fini();
345 dst_entries_destroy(&xfrm6_dst_ops);
344} 346}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2ce3a8278f26..2969cad408de 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -199,7 +199,7 @@ static void x6spi_destroy_rcu(struct rcu_head *head)
199 container_of(head, struct xfrm6_tunnel_spi, rcu_head)); 199 container_of(head, struct xfrm6_tunnel_spi, rcu_head));
200} 200}
201 201
202void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) 202static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
203{ 203{
204 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); 204 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
205 struct xfrm6_tunnel_spi *x6spi; 205 struct xfrm6_tunnel_spi *x6spi;
@@ -223,8 +223,6 @@ void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
223 spin_unlock_bh(&xfrm6_tunnel_spi_lock); 223 spin_unlock_bh(&xfrm6_tunnel_spi_lock);
224} 224}
225 225
226EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
227
228static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 226static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
229{ 227{
230 skb_push(skb, -skb_network_offset(skb)); 228 skb_push(skb, -skb_network_offset(skb));
@@ -317,13 +315,13 @@ static const struct xfrm_type xfrm6_tunnel_type = {
317 .output = xfrm6_tunnel_output, 315 .output = xfrm6_tunnel_output,
318}; 316};
319 317
320static struct xfrm6_tunnel xfrm6_tunnel_handler = { 318static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
321 .handler = xfrm6_tunnel_rcv, 319 .handler = xfrm6_tunnel_rcv,
322 .err_handler = xfrm6_tunnel_err, 320 .err_handler = xfrm6_tunnel_err,
323 .priority = 2, 321 .priority = 2,
324}; 322};
325 323
326static struct xfrm6_tunnel xfrm46_tunnel_handler = { 324static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
327 .handler = xfrm6_tunnel_rcv, 325 .handler = xfrm6_tunnel_rcv,
328 .err_handler = xfrm6_tunnel_err, 326 .err_handler = xfrm6_tunnel_err,
329 .priority = 2, 327 .priority = 2,
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index e9ad0062fbb6..02549cb2c328 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -3,6 +3,7 @@
3# 3#
4config IPX 4config IPX
5 tristate "The IPX protocol" 5 tristate "The IPX protocol"
6 depends on BKL # should be fixable
6 select LLC 7 select LLC
7 ---help--- 8 ---help---
8 This is support for the Novell networking protocol, IPX, commonly 9 This is support for the Novell networking protocol, IPX, commonly
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fd55b5135de5..7f097989cde2 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
573 /* Requested object/attribute doesn't exist */ 573 /* Requested object/attribute doesn't exist */
574 if((self->errno == IAS_CLASS_UNKNOWN) || 574 if((self->errno == IAS_CLASS_UNKNOWN) ||
575 (self->errno == IAS_ATTRIB_UNKNOWN)) 575 (self->errno == IAS_ATTRIB_UNKNOWN))
576 return (-EADDRNOTAVAIL); 576 return -EADDRNOTAVAIL;
577 else 577 else
578 return (-EHOSTUNREACH); 578 return -EHOSTUNREACH;
579 } 579 }
580 580
581 /* Get the remote TSAP selector */ 581 /* Get the remote TSAP selector */
@@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
663 __func__, name); 663 __func__, name);
664 self->daddr = DEV_ADDR_ANY; 664 self->daddr = DEV_ADDR_ANY;
665 kfree(discoveries); 665 kfree(discoveries);
666 return(-ENOTUNIQ); 666 return -ENOTUNIQ;
667 } 667 }
668 /* First time we found that one, save it ! */ 668 /* First time we found that one, save it ! */
669 daddr = self->daddr; 669 daddr = self->daddr;
@@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__); 677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__);
678 self->daddr = DEV_ADDR_ANY; 678 self->daddr = DEV_ADDR_ANY;
679 kfree(discoveries); 679 kfree(discoveries);
680 return(-EHOSTUNREACH); 680 return -EHOSTUNREACH;
681 break; 681 break;
682 } 682 }
683 } 683 }
@@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n", 689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
690 __func__, name); 690 __func__, name);
691 self->daddr = DEV_ADDR_ANY; 691 self->daddr = DEV_ADDR_ANY;
692 return(-EADDRNOTAVAIL); 692 return -EADDRNOTAVAIL;
693 } 693 }
694 694
695 /* Revert back to discovered device & service */ 695 /* Revert back to discovered device & service */
@@ -715,14 +715,11 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
715 struct sockaddr_irda saddr; 715 struct sockaddr_irda saddr;
716 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
717 struct irda_sock *self = irda_sk(sk); 717 struct irda_sock *self = irda_sk(sk);
718 int err;
719 718
720 lock_kernel();
721 memset(&saddr, 0, sizeof(saddr)); 719 memset(&saddr, 0, sizeof(saddr));
722 if (peer) { 720 if (peer) {
723 err = -ENOTCONN;
724 if (sk->sk_state != TCP_ESTABLISHED) 721 if (sk->sk_state != TCP_ESTABLISHED)
725 goto out; 722 return -ENOTCONN;
726 723
727 saddr.sir_family = AF_IRDA; 724 saddr.sir_family = AF_IRDA;
728 saddr.sir_lsap_sel = self->dtsap_sel; 725 saddr.sir_lsap_sel = self->dtsap_sel;
@@ -739,10 +736,8 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
739 /* uaddr_len come to us uninitialised */ 736 /* uaddr_len come to us uninitialised */
740 *uaddr_len = sizeof (struct sockaddr_irda); 737 *uaddr_len = sizeof (struct sockaddr_irda);
741 memcpy(uaddr, &saddr, *uaddr_len); 738 memcpy(uaddr, &saddr, *uaddr_len);
742 err = 0; 739
743out: 740 return 0;
744 unlock_kernel();
745 return err;
746} 741}
747 742
748/* 743/*
@@ -758,7 +753,8 @@ static int irda_listen(struct socket *sock, int backlog)
758 753
759 IRDA_DEBUG(2, "%s()\n", __func__); 754 IRDA_DEBUG(2, "%s()\n", __func__);
760 755
761 lock_kernel(); 756 lock_sock(sk);
757
762 if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && 758 if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
763 (sk->sk_type != SOCK_DGRAM)) 759 (sk->sk_type != SOCK_DGRAM))
764 goto out; 760 goto out;
@@ -770,7 +766,7 @@ static int irda_listen(struct socket *sock, int backlog)
770 err = 0; 766 err = 0;
771 } 767 }
772out: 768out:
773 unlock_kernel(); 769 release_sock(sk);
774 770
775 return err; 771 return err;
776} 772}
@@ -793,7 +789,7 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
793 if (addr_len != sizeof(struct sockaddr_irda)) 789 if (addr_len != sizeof(struct sockaddr_irda))
794 return -EINVAL; 790 return -EINVAL;
795 791
796 lock_kernel(); 792 lock_sock(sk);
797#ifdef CONFIG_IRDA_ULTRA 793#ifdef CONFIG_IRDA_ULTRA
798 /* Special care for Ultra sockets */ 794 /* Special care for Ultra sockets */
799 if ((sk->sk_type == SOCK_DGRAM) && 795 if ((sk->sk_type == SOCK_DGRAM) &&
@@ -836,7 +832,7 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
836 832
837 err = 0; 833 err = 0;
838out: 834out:
839 unlock_kernel(); 835 release_sock(sk);
840 return err; 836 return err;
841} 837}
842 838
@@ -856,12 +852,13 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
856 852
857 IRDA_DEBUG(2, "%s()\n", __func__); 853 IRDA_DEBUG(2, "%s()\n", __func__);
858 854
859 lock_kernel();
860 err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); 855 err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
861 if (err) 856 if (err)
862 goto out; 857 return err;
863 858
864 err = -EINVAL; 859 err = -EINVAL;
860
861 lock_sock(sk);
865 if (sock->state != SS_UNCONNECTED) 862 if (sock->state != SS_UNCONNECTED)
866 goto out; 863 goto out;
867 864
@@ -947,7 +944,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
947 irda_connect_response(new); 944 irda_connect_response(new);
948 err = 0; 945 err = 0;
949out: 946out:
950 unlock_kernel(); 947 release_sock(sk);
951 return err; 948 return err;
952} 949}
953 950
@@ -981,7 +978,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
981 978
982 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 979 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
983 980
984 lock_kernel(); 981 lock_sock(sk);
985 /* Don't allow connect for Ultra sockets */ 982 /* Don't allow connect for Ultra sockets */
986 err = -ESOCKTNOSUPPORT; 983 err = -ESOCKTNOSUPPORT;
987 if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA)) 984 if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA))
@@ -1072,6 +1069,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1072 1069
1073 if (sk->sk_state != TCP_ESTABLISHED) { 1070 if (sk->sk_state != TCP_ESTABLISHED) {
1074 sock->state = SS_UNCONNECTED; 1071 sock->state = SS_UNCONNECTED;
1072 if (sk->sk_prot->disconnect(sk, flags))
1073 sock->state = SS_DISCONNECTING;
1075 err = sock_error(sk); 1074 err = sock_error(sk);
1076 if (!err) 1075 if (!err)
1077 err = -ECONNRESET; 1076 err = -ECONNRESET;
@@ -1084,7 +1083,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1084 self->saddr = irttp_get_saddr(self->tsap); 1083 self->saddr = irttp_get_saddr(self->tsap);
1085 err = 0; 1084 err = 0;
1086out: 1085out:
1087 unlock_kernel(); 1086 release_sock(sk);
1088 return err; 1087 return err;
1089} 1088}
1090 1089
@@ -1231,7 +1230,6 @@ static int irda_release(struct socket *sock)
1231 if (sk == NULL) 1230 if (sk == NULL)
1232 return 0; 1231 return 0;
1233 1232
1234 lock_kernel();
1235 lock_sock(sk); 1233 lock_sock(sk);
1236 sk->sk_state = TCP_CLOSE; 1234 sk->sk_state = TCP_CLOSE;
1237 sk->sk_shutdown |= SEND_SHUTDOWN; 1235 sk->sk_shutdown |= SEND_SHUTDOWN;
@@ -1250,7 +1248,6 @@ static int irda_release(struct socket *sock)
1250 /* Destroy networking socket if we are the last reference on it, 1248 /* Destroy networking socket if we are the last reference on it,
1251 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */ 1249 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */
1252 sock_put(sk); 1250 sock_put(sk);
1253 unlock_kernel();
1254 1251
1255 /* Notes on socket locking and deallocation... - Jean II 1252 /* Notes on socket locking and deallocation... - Jean II
1256 * In theory we should put pairs of sock_hold() / sock_put() to 1253 * In theory we should put pairs of sock_hold() / sock_put() to
@@ -1298,7 +1295,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1298 1295
1299 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1296 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1300 1297
1301 lock_kernel();
1302 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ 1298 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */
1303 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | 1299 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
1304 MSG_NOSIGNAL)) { 1300 MSG_NOSIGNAL)) {
@@ -1306,6 +1302,8 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1306 goto out; 1302 goto out;
1307 } 1303 }
1308 1304
1305 lock_sock(sk);
1306
1309 if (sk->sk_shutdown & SEND_SHUTDOWN) 1307 if (sk->sk_shutdown & SEND_SHUTDOWN)
1310 goto out_err; 1308 goto out_err;
1311 1309
@@ -1361,14 +1359,14 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1361 goto out_err; 1359 goto out_err;
1362 } 1360 }
1363 1361
1364 unlock_kernel(); 1362 release_sock(sk);
1365 /* Tell client how much data we actually sent */ 1363 /* Tell client how much data we actually sent */
1366 return len; 1364 return len;
1367 1365
1368out_err: 1366out_err:
1369 err = sk_stream_error(sk, msg->msg_flags, err); 1367 err = sk_stream_error(sk, msg->msg_flags, err);
1370out: 1368out:
1371 unlock_kernel(); 1369 release_sock(sk);
1372 return err; 1370 return err;
1373 1371
1374} 1372}
@@ -1390,14 +1388,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1390 1388
1391 IRDA_DEBUG(4, "%s()\n", __func__); 1389 IRDA_DEBUG(4, "%s()\n", __func__);
1392 1390
1393 lock_kernel();
1394 if ((err = sock_error(sk)) < 0)
1395 goto out;
1396
1397 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1391 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1398 flags & MSG_DONTWAIT, &err); 1392 flags & MSG_DONTWAIT, &err);
1399 if (!skb) 1393 if (!skb)
1400 goto out; 1394 return err;
1401 1395
1402 skb_reset_transport_header(skb); 1396 skb_reset_transport_header(skb);
1403 copied = skb->len; 1397 copied = skb->len;
@@ -1425,12 +1419,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1425 irttp_flow_request(self->tsap, FLOW_START); 1419 irttp_flow_request(self->tsap, FLOW_START);
1426 } 1420 }
1427 } 1421 }
1428 unlock_kernel();
1429 return copied;
1430 1422
1431out: 1423 return copied;
1432 unlock_kernel();
1433 return err;
1434} 1424}
1435 1425
1436/* 1426/*
@@ -1448,17 +1438,15 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1448 1438
1449 IRDA_DEBUG(3, "%s()\n", __func__); 1439 IRDA_DEBUG(3, "%s()\n", __func__);
1450 1440
1451 lock_kernel();
1452 if ((err = sock_error(sk)) < 0) 1441 if ((err = sock_error(sk)) < 0)
1453 goto out; 1442 return err;
1454 1443
1455 err = -EINVAL;
1456 if (sock->flags & __SO_ACCEPTCON) 1444 if (sock->flags & __SO_ACCEPTCON)
1457 goto out; 1445 return -EINVAL;
1458 1446
1459 err =-EOPNOTSUPP; 1447 err =-EOPNOTSUPP;
1460 if (flags & MSG_OOB) 1448 if (flags & MSG_OOB)
1461 goto out; 1449 return -EOPNOTSUPP;
1462 1450
1463 err = 0; 1451 err = 0;
1464 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 1452 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
@@ -1500,7 +1488,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1500 finish_wait(sk_sleep(sk), &wait); 1488 finish_wait(sk_sleep(sk), &wait);
1501 1489
1502 if (err) 1490 if (err)
1503 goto out; 1491 return err;
1504 if (sk->sk_shutdown & RCV_SHUTDOWN) 1492 if (sk->sk_shutdown & RCV_SHUTDOWN)
1505 break; 1493 break;
1506 1494
@@ -1553,9 +1541,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1553 } 1541 }
1554 } 1542 }
1555 1543
1556out: 1544 return copied;
1557 unlock_kernel();
1558 return err ? : copied;
1559} 1545}
1560 1546
1561/* 1547/*
@@ -1573,13 +1559,12 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1573 struct sk_buff *skb; 1559 struct sk_buff *skb;
1574 int err; 1560 int err;
1575 1561
1576 lock_kernel();
1577
1578 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1562 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1579 1563
1580 err = -EINVAL;
1581 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 1564 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
1582 goto out; 1565 return -EINVAL;
1566
1567 lock_sock(sk);
1583 1568
1584 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1569 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1585 send_sig(SIGPIPE, current, 0); 1570 send_sig(SIGPIPE, current, 0);
@@ -1630,10 +1615,12 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1630 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); 1615 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err);
1631 goto out; 1616 goto out;
1632 } 1617 }
1633 unlock_kernel(); 1618
1619 release_sock(sk);
1634 return len; 1620 return len;
1621
1635out: 1622out:
1636 unlock_kernel(); 1623 release_sock(sk);
1637 return err; 1624 return err;
1638} 1625}
1639 1626
@@ -1656,10 +1643,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1656 1643
1657 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1644 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1658 1645
1659 lock_kernel();
1660 err = -EINVAL; 1646 err = -EINVAL;
1661 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 1647 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
1662 goto out; 1648 return -EINVAL;
1649
1650 lock_sock(sk);
1663 1651
1664 err = -EPIPE; 1652 err = -EPIPE;
1665 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1653 if (sk->sk_shutdown & SEND_SHUTDOWN) {
@@ -1732,7 +1720,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1732 if (err) 1720 if (err)
1733 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); 1721 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err);
1734out: 1722out:
1735 unlock_kernel(); 1723 release_sock(sk);
1736 return err ? : len; 1724 return err ? : len;
1737} 1725}
1738#endif /* CONFIG_IRDA_ULTRA */ 1726#endif /* CONFIG_IRDA_ULTRA */
@@ -1747,7 +1735,7 @@ static int irda_shutdown(struct socket *sock, int how)
1747 1735
1748 IRDA_DEBUG(1, "%s(%p)\n", __func__, self); 1736 IRDA_DEBUG(1, "%s(%p)\n", __func__, self);
1749 1737
1750 lock_kernel(); 1738 lock_sock(sk);
1751 1739
1752 sk->sk_state = TCP_CLOSE; 1740 sk->sk_state = TCP_CLOSE;
1753 sk->sk_shutdown |= SEND_SHUTDOWN; 1741 sk->sk_shutdown |= SEND_SHUTDOWN;
@@ -1769,7 +1757,7 @@ static int irda_shutdown(struct socket *sock, int how)
1769 self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */ 1757 self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */
1770 self->saddr = 0x0; /* so IrLMP assign us any link */ 1758 self->saddr = 0x0; /* so IrLMP assign us any link */
1771 1759
1772 unlock_kernel(); 1760 release_sock(sk);
1773 1761
1774 return 0; 1762 return 0;
1775} 1763}
@@ -1786,7 +1774,6 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
1786 1774
1787 IRDA_DEBUG(4, "%s()\n", __func__); 1775 IRDA_DEBUG(4, "%s()\n", __func__);
1788 1776
1789 lock_kernel();
1790 poll_wait(file, sk_sleep(sk), wait); 1777 poll_wait(file, sk_sleep(sk), wait);
1791 mask = 0; 1778 mask = 0;
1792 1779
@@ -1834,20 +1821,8 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
1834 default: 1821 default:
1835 break; 1822 break;
1836 } 1823 }
1837 unlock_kernel();
1838 return mask;
1839}
1840 1824
1841static unsigned int irda_datagram_poll(struct file *file, struct socket *sock, 1825 return mask;
1842 poll_table *wait)
1843{
1844 int err;
1845
1846 lock_kernel();
1847 err = datagram_poll(file, sock, wait);
1848 unlock_kernel();
1849
1850 return err;
1851} 1826}
1852 1827
1853/* 1828/*
@@ -1860,7 +1835,6 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1860 1835
1861 IRDA_DEBUG(4, "%s(), cmd=%#x\n", __func__, cmd); 1836 IRDA_DEBUG(4, "%s(), cmd=%#x\n", __func__, cmd);
1862 1837
1863 lock_kernel();
1864 err = -EINVAL; 1838 err = -EINVAL;
1865 switch (cmd) { 1839 switch (cmd) {
1866 case TIOCOUTQ: { 1840 case TIOCOUTQ: {
@@ -1903,7 +1877,6 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1903 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __func__); 1877 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __func__);
1904 err = -ENOIOCTLCMD; 1878 err = -ENOIOCTLCMD;
1905 } 1879 }
1906 unlock_kernel();
1907 1880
1908 return err; 1881 return err;
1909} 1882}
@@ -1927,7 +1900,7 @@ static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
1927 * Set some options for the socket 1900 * Set some options for the socket
1928 * 1901 *
1929 */ 1902 */
1930static int __irda_setsockopt(struct socket *sock, int level, int optname, 1903static int irda_setsockopt(struct socket *sock, int level, int optname,
1931 char __user *optval, unsigned int optlen) 1904 char __user *optval, unsigned int optlen)
1932{ 1905{
1933 struct sock *sk = sock->sk; 1906 struct sock *sk = sock->sk;
@@ -1935,13 +1908,15 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1935 struct irda_ias_set *ias_opt; 1908 struct irda_ias_set *ias_opt;
1936 struct ias_object *ias_obj; 1909 struct ias_object *ias_obj;
1937 struct ias_attrib * ias_attr; /* Attribute in IAS object */ 1910 struct ias_attrib * ias_attr; /* Attribute in IAS object */
1938 int opt, free_ias = 0; 1911 int opt, free_ias = 0, err = 0;
1939 1912
1940 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 1913 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
1941 1914
1942 if (level != SOL_IRLMP) 1915 if (level != SOL_IRLMP)
1943 return -ENOPROTOOPT; 1916 return -ENOPROTOOPT;
1944 1917
1918 lock_sock(sk);
1919
1945 switch (optname) { 1920 switch (optname) {
1946 case IRLMP_IAS_SET: 1921 case IRLMP_IAS_SET:
1947 /* The user want to add an attribute to an existing IAS object 1922 /* The user want to add an attribute to an existing IAS object
@@ -1951,17 +1926,22 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1951 * create the right attribute... 1926 * create the right attribute...
1952 */ 1927 */
1953 1928
1954 if (optlen != sizeof(struct irda_ias_set)) 1929 if (optlen != sizeof(struct irda_ias_set)) {
1955 return -EINVAL; 1930 err = -EINVAL;
1931 goto out;
1932 }
1956 1933
1957 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 1934 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
1958 if (ias_opt == NULL) 1935 if (ias_opt == NULL) {
1959 return -ENOMEM; 1936 err = -ENOMEM;
1937 goto out;
1938 }
1960 1939
1961 /* Copy query to the driver. */ 1940 /* Copy query to the driver. */
1962 if (copy_from_user(ias_opt, optval, optlen)) { 1941 if (copy_from_user(ias_opt, optval, optlen)) {
1963 kfree(ias_opt); 1942 kfree(ias_opt);
1964 return -EFAULT; 1943 err = -EFAULT;
1944 goto out;
1965 } 1945 }
1966 1946
1967 /* Find the object we target. 1947 /* Find the object we target.
@@ -1971,7 +1951,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1971 if(ias_opt->irda_class_name[0] == '\0') { 1951 if(ias_opt->irda_class_name[0] == '\0') {
1972 if(self->ias_obj == NULL) { 1952 if(self->ias_obj == NULL) {
1973 kfree(ias_opt); 1953 kfree(ias_opt);
1974 return -EINVAL; 1954 err = -EINVAL;
1955 goto out;
1975 } 1956 }
1976 ias_obj = self->ias_obj; 1957 ias_obj = self->ias_obj;
1977 } else 1958 } else
@@ -1983,7 +1964,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1983 if((!capable(CAP_NET_ADMIN)) && 1964 if((!capable(CAP_NET_ADMIN)) &&
1984 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { 1965 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
1985 kfree(ias_opt); 1966 kfree(ias_opt);
1986 return -EPERM; 1967 err = -EPERM;
1968 goto out;
1987 } 1969 }
1988 1970
1989 /* If the object doesn't exist, create it */ 1971 /* If the object doesn't exist, create it */
@@ -1993,7 +1975,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1993 jiffies); 1975 jiffies);
1994 if (ias_obj == NULL) { 1976 if (ias_obj == NULL) {
1995 kfree(ias_opt); 1977 kfree(ias_opt);
1996 return -ENOMEM; 1978 err = -ENOMEM;
1979 goto out;
1997 } 1980 }
1998 free_ias = 1; 1981 free_ias = 1;
1999 } 1982 }
@@ -2005,7 +1988,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2005 kfree(ias_obj->name); 1988 kfree(ias_obj->name);
2006 kfree(ias_obj); 1989 kfree(ias_obj);
2007 } 1990 }
2008 return -EINVAL; 1991 err = -EINVAL;
1992 goto out;
2009 } 1993 }
2010 1994
2011 /* Look at the type */ 1995 /* Look at the type */
@@ -2028,7 +2012,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2028 kfree(ias_obj); 2012 kfree(ias_obj);
2029 } 2013 }
2030 2014
2031 return -EINVAL; 2015 err = -EINVAL;
2016 goto out;
2032 } 2017 }
2033 /* Add an octet sequence attribute */ 2018 /* Add an octet sequence attribute */
2034 irias_add_octseq_attrib( 2019 irias_add_octseq_attrib(
@@ -2060,7 +2045,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2060 kfree(ias_obj->name); 2045 kfree(ias_obj->name);
2061 kfree(ias_obj); 2046 kfree(ias_obj);
2062 } 2047 }
2063 return -EINVAL; 2048 err = -EINVAL;
2049 goto out;
2064 } 2050 }
2065 irias_insert_object(ias_obj); 2051 irias_insert_object(ias_obj);
2066 kfree(ias_opt); 2052 kfree(ias_opt);
@@ -2071,17 +2057,22 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2071 * object is not owned by the kernel and delete it. 2057 * object is not owned by the kernel and delete it.
2072 */ 2058 */
2073 2059
2074 if (optlen != sizeof(struct irda_ias_set)) 2060 if (optlen != sizeof(struct irda_ias_set)) {
2075 return -EINVAL; 2061 err = -EINVAL;
2062 goto out;
2063 }
2076 2064
2077 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2065 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2078 if (ias_opt == NULL) 2066 if (ias_opt == NULL) {
2079 return -ENOMEM; 2067 err = -ENOMEM;
2068 goto out;
2069 }
2080 2070
2081 /* Copy query to the driver. */ 2071 /* Copy query to the driver. */
2082 if (copy_from_user(ias_opt, optval, optlen)) { 2072 if (copy_from_user(ias_opt, optval, optlen)) {
2083 kfree(ias_opt); 2073 kfree(ias_opt);
2084 return -EFAULT; 2074 err = -EFAULT;
2075 goto out;
2085 } 2076 }
2086 2077
2087 /* Find the object we target. 2078 /* Find the object we target.
@@ -2094,7 +2085,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2094 ias_obj = irias_find_object(ias_opt->irda_class_name); 2085 ias_obj = irias_find_object(ias_opt->irda_class_name);
2095 if(ias_obj == (struct ias_object *) NULL) { 2086 if(ias_obj == (struct ias_object *) NULL) {
2096 kfree(ias_opt); 2087 kfree(ias_opt);
2097 return -EINVAL; 2088 err = -EINVAL;
2089 goto out;
2098 } 2090 }
2099 2091
2100 /* Only ROOT can mess with the global IAS database. 2092 /* Only ROOT can mess with the global IAS database.
@@ -2103,7 +2095,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2103 if((!capable(CAP_NET_ADMIN)) && 2095 if((!capable(CAP_NET_ADMIN)) &&
2104 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { 2096 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
2105 kfree(ias_opt); 2097 kfree(ias_opt);
2106 return -EPERM; 2098 err = -EPERM;
2099 goto out;
2107 } 2100 }
2108 2101
2109 /* Find the attribute (in the object) we target */ 2102 /* Find the attribute (in the object) we target */
@@ -2111,14 +2104,16 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2111 ias_opt->irda_attrib_name); 2104 ias_opt->irda_attrib_name);
2112 if(ias_attr == (struct ias_attrib *) NULL) { 2105 if(ias_attr == (struct ias_attrib *) NULL) {
2113 kfree(ias_opt); 2106 kfree(ias_opt);
2114 return -EINVAL; 2107 err = -EINVAL;
2108 goto out;
2115 } 2109 }
2116 2110
2117 /* Check is the user space own the object */ 2111 /* Check is the user space own the object */
2118 if(ias_attr->value->owner != IAS_USER_ATTR) { 2112 if(ias_attr->value->owner != IAS_USER_ATTR) {
2119 IRDA_DEBUG(1, "%s(), attempting to delete a kernel attribute\n", __func__); 2113 IRDA_DEBUG(1, "%s(), attempting to delete a kernel attribute\n", __func__);
2120 kfree(ias_opt); 2114 kfree(ias_opt);
2121 return -EPERM; 2115 err = -EPERM;
2116 goto out;
2122 } 2117 }
2123 2118
2124 /* Remove the attribute (and maybe the object) */ 2119 /* Remove the attribute (and maybe the object) */
@@ -2126,11 +2121,15 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2126 kfree(ias_opt); 2121 kfree(ias_opt);
2127 break; 2122 break;
2128 case IRLMP_MAX_SDU_SIZE: 2123 case IRLMP_MAX_SDU_SIZE:
2129 if (optlen < sizeof(int)) 2124 if (optlen < sizeof(int)) {
2130 return -EINVAL; 2125 err = -EINVAL;
2126 goto out;
2127 }
2131 2128
2132 if (get_user(opt, (int __user *)optval)) 2129 if (get_user(opt, (int __user *)optval)) {
2133 return -EFAULT; 2130 err = -EFAULT;
2131 goto out;
2132 }
2134 2133
2135 /* Only possible for a seqpacket service (TTP with SAR) */ 2134 /* Only possible for a seqpacket service (TTP with SAR) */
2136 if (sk->sk_type != SOCK_SEQPACKET) { 2135 if (sk->sk_type != SOCK_SEQPACKET) {
@@ -2140,16 +2139,21 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2140 } else { 2139 } else {
2141 IRDA_WARNING("%s: not allowed to set MAXSDUSIZE for this socket type!\n", 2140 IRDA_WARNING("%s: not allowed to set MAXSDUSIZE for this socket type!\n",
2142 __func__); 2141 __func__);
2143 return -ENOPROTOOPT; 2142 err = -ENOPROTOOPT;
2143 goto out;
2144 } 2144 }
2145 break; 2145 break;
2146 case IRLMP_HINTS_SET: 2146 case IRLMP_HINTS_SET:
2147 if (optlen < sizeof(int)) 2147 if (optlen < sizeof(int)) {
2148 return -EINVAL; 2148 err = -EINVAL;
2149 goto out;
2150 }
2149 2151
2150 /* The input is really a (__u8 hints[2]), easier as an int */ 2152 /* The input is really a (__u8 hints[2]), easier as an int */
2151 if (get_user(opt, (int __user *)optval)) 2153 if (get_user(opt, (int __user *)optval)) {
2152 return -EFAULT; 2154 err = -EFAULT;
2155 goto out;
2156 }
2153 2157
2154 /* Unregister any old registration */ 2158 /* Unregister any old registration */
2155 if (self->skey) 2159 if (self->skey)
@@ -2163,12 +2167,16 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2163 * making a discovery (nodes which don't match any hint 2167 * making a discovery (nodes which don't match any hint
2164 * bit in the mask are not reported). 2168 * bit in the mask are not reported).
2165 */ 2169 */
2166 if (optlen < sizeof(int)) 2170 if (optlen < sizeof(int)) {
2167 return -EINVAL; 2171 err = -EINVAL;
2172 goto out;
2173 }
2168 2174
2169 /* The input is really a (__u8 hints[2]), easier as an int */ 2175 /* The input is really a (__u8 hints[2]), easier as an int */
2170 if (get_user(opt, (int __user *)optval)) 2176 if (get_user(opt, (int __user *)optval)) {
2171 return -EFAULT; 2177 err = -EFAULT;
2178 goto out;
2179 }
2172 2180
2173 /* Set the new hint mask */ 2181 /* Set the new hint mask */
2174 self->mask.word = (__u16) opt; 2182 self->mask.word = (__u16) opt;
@@ -2180,19 +2188,12 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2180 2188
2181 break; 2189 break;
2182 default: 2190 default:
2183 return -ENOPROTOOPT; 2191 err = -ENOPROTOOPT;
2192 break;
2184 } 2193 }
2185 return 0;
2186}
2187 2194
2188static int irda_setsockopt(struct socket *sock, int level, int optname, 2195out:
2189 char __user *optval, unsigned int optlen) 2196 release_sock(sk);
2190{
2191 int err;
2192
2193 lock_kernel();
2194 err = __irda_setsockopt(sock, level, optname, optval, optlen);
2195 unlock_kernel();
2196 2197
2197 return err; 2198 return err;
2198} 2199}
@@ -2249,7 +2250,7 @@ static int irda_extract_ias_value(struct irda_ias_set *ias_opt,
2249/* 2250/*
2250 * Function irda_getsockopt (sock, level, optname, optval, optlen) 2251 * Function irda_getsockopt (sock, level, optname, optval, optlen)
2251 */ 2252 */
2252static int __irda_getsockopt(struct socket *sock, int level, int optname, 2253static int irda_getsockopt(struct socket *sock, int level, int optname,
2253 char __user *optval, int __user *optlen) 2254 char __user *optval, int __user *optlen)
2254{ 2255{
2255 struct sock *sk = sock->sk; 2256 struct sock *sk = sock->sk;
@@ -2262,7 +2263,7 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2262 int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */ 2263 int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */
2263 int val = 0; 2264 int val = 0;
2264 int len = 0; 2265 int len = 0;
2265 int err; 2266 int err = 0;
2266 int offset, total; 2267 int offset, total;
2267 2268
2268 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 2269 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
@@ -2276,15 +2277,18 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2276 if(len < 0) 2277 if(len < 0)
2277 return -EINVAL; 2278 return -EINVAL;
2278 2279
2280 lock_sock(sk);
2281
2279 switch (optname) { 2282 switch (optname) {
2280 case IRLMP_ENUMDEVICES: 2283 case IRLMP_ENUMDEVICES:
2281 /* Ask lmp for the current discovery log */ 2284 /* Ask lmp for the current discovery log */
2282 discoveries = irlmp_get_discoveries(&list.len, self->mask.word, 2285 discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
2283 self->nslots); 2286 self->nslots);
2284 /* Check if the we got some results */ 2287 /* Check if the we got some results */
2285 if (discoveries == NULL) 2288 if (discoveries == NULL) {
2286 return -EAGAIN; /* Didn't find any devices */ 2289 err = -EAGAIN;
2287 err = 0; 2290 goto out; /* Didn't find any devices */
2291 }
2288 2292
2289 /* Write total list length back to client */ 2293 /* Write total list length back to client */
2290 if (copy_to_user(optval, &list, 2294 if (copy_to_user(optval, &list,
@@ -2297,8 +2301,7 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2297 sizeof(struct irda_device_info); 2301 sizeof(struct irda_device_info);
2298 2302
2299 /* Copy the list itself - watch for overflow */ 2303 /* Copy the list itself - watch for overflow */
2300 if(list.len > 2048) 2304 if (list.len > 2048) {
2301 {
2302 err = -EINVAL; 2305 err = -EINVAL;
2303 goto bed; 2306 goto bed;
2304 } 2307 }
@@ -2314,17 +2317,20 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2314bed: 2317bed:
2315 /* Free up our buffer */ 2318 /* Free up our buffer */
2316 kfree(discoveries); 2319 kfree(discoveries);
2317 if (err)
2318 return err;
2319 break; 2320 break;
2320 case IRLMP_MAX_SDU_SIZE: 2321 case IRLMP_MAX_SDU_SIZE:
2321 val = self->max_data_size; 2322 val = self->max_data_size;
2322 len = sizeof(int); 2323 len = sizeof(int);
2323 if (put_user(len, optlen)) 2324 if (put_user(len, optlen)) {
2324 return -EFAULT; 2325 err = -EFAULT;
2326 goto out;
2327 }
2328
2329 if (copy_to_user(optval, &val, len)) {
2330 err = -EFAULT;
2331 goto out;
2332 }
2325 2333
2326 if (copy_to_user(optval, &val, len))
2327 return -EFAULT;
2328 break; 2334 break;
2329 case IRLMP_IAS_GET: 2335 case IRLMP_IAS_GET:
2330 /* The user want an object from our local IAS database. 2336 /* The user want an object from our local IAS database.
@@ -2332,17 +2338,22 @@ bed:
2332 * that we found */ 2338 * that we found */
2333 2339
2334 /* Check that the user has allocated the right space for us */ 2340 /* Check that the user has allocated the right space for us */
2335 if (len != sizeof(struct irda_ias_set)) 2341 if (len != sizeof(struct irda_ias_set)) {
2336 return -EINVAL; 2342 err = -EINVAL;
2343 goto out;
2344 }
2337 2345
2338 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2346 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2339 if (ias_opt == NULL) 2347 if (ias_opt == NULL) {
2340 return -ENOMEM; 2348 err = -ENOMEM;
2349 goto out;
2350 }
2341 2351
2342 /* Copy query to the driver. */ 2352 /* Copy query to the driver. */
2343 if (copy_from_user(ias_opt, optval, len)) { 2353 if (copy_from_user(ias_opt, optval, len)) {
2344 kfree(ias_opt); 2354 kfree(ias_opt);
2345 return -EFAULT; 2355 err = -EFAULT;
2356 goto out;
2346 } 2357 }
2347 2358
2348 /* Find the object we target. 2359 /* Find the object we target.
@@ -2355,7 +2366,8 @@ bed:
2355 ias_obj = irias_find_object(ias_opt->irda_class_name); 2366 ias_obj = irias_find_object(ias_opt->irda_class_name);
2356 if(ias_obj == (struct ias_object *) NULL) { 2367 if(ias_obj == (struct ias_object *) NULL) {
2357 kfree(ias_opt); 2368 kfree(ias_opt);
2358 return -EINVAL; 2369 err = -EINVAL;
2370 goto out;
2359 } 2371 }
2360 2372
2361 /* Find the attribute (in the object) we target */ 2373 /* Find the attribute (in the object) we target */
@@ -2363,21 +2375,23 @@ bed:
2363 ias_opt->irda_attrib_name); 2375 ias_opt->irda_attrib_name);
2364 if(ias_attr == (struct ias_attrib *) NULL) { 2376 if(ias_attr == (struct ias_attrib *) NULL) {
2365 kfree(ias_opt); 2377 kfree(ias_opt);
2366 return -EINVAL; 2378 err = -EINVAL;
2379 goto out;
2367 } 2380 }
2368 2381
2369 /* Translate from internal to user structure */ 2382 /* Translate from internal to user structure */
2370 err = irda_extract_ias_value(ias_opt, ias_attr->value); 2383 err = irda_extract_ias_value(ias_opt, ias_attr->value);
2371 if(err) { 2384 if(err) {
2372 kfree(ias_opt); 2385 kfree(ias_opt);
2373 return err; 2386 goto out;
2374 } 2387 }
2375 2388
2376 /* Copy reply to the user */ 2389 /* Copy reply to the user */
2377 if (copy_to_user(optval, ias_opt, 2390 if (copy_to_user(optval, ias_opt,
2378 sizeof(struct irda_ias_set))) { 2391 sizeof(struct irda_ias_set))) {
2379 kfree(ias_opt); 2392 kfree(ias_opt);
2380 return -EFAULT; 2393 err = -EFAULT;
2394 goto out;
2381 } 2395 }
2382 /* Note : don't need to put optlen, we checked it */ 2396 /* Note : don't need to put optlen, we checked it */
2383 kfree(ias_opt); 2397 kfree(ias_opt);
@@ -2388,17 +2402,22 @@ bed:
2388 * then wait for the answer to come back. */ 2402 * then wait for the answer to come back. */
2389 2403
2390 /* Check that the user has allocated the right space for us */ 2404 /* Check that the user has allocated the right space for us */
2391 if (len != sizeof(struct irda_ias_set)) 2405 if (len != sizeof(struct irda_ias_set)) {
2392 return -EINVAL; 2406 err = -EINVAL;
2407 goto out;
2408 }
2393 2409
2394 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2410 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2395 if (ias_opt == NULL) 2411 if (ias_opt == NULL) {
2396 return -ENOMEM; 2412 err = -ENOMEM;
2413 goto out;
2414 }
2397 2415
2398 /* Copy query to the driver. */ 2416 /* Copy query to the driver. */
2399 if (copy_from_user(ias_opt, optval, len)) { 2417 if (copy_from_user(ias_opt, optval, len)) {
2400 kfree(ias_opt); 2418 kfree(ias_opt);
2401 return -EFAULT; 2419 err = -EFAULT;
2420 goto out;
2402 } 2421 }
2403 2422
2404 /* At this point, there are two cases... 2423 /* At this point, there are two cases...
@@ -2419,7 +2438,8 @@ bed:
2419 daddr = ias_opt->daddr; 2438 daddr = ias_opt->daddr;
2420 if((!daddr) || (daddr == DEV_ADDR_ANY)) { 2439 if((!daddr) || (daddr == DEV_ADDR_ANY)) {
2421 kfree(ias_opt); 2440 kfree(ias_opt);
2422 return -EINVAL; 2441 err = -EINVAL;
2442 goto out;
2423 } 2443 }
2424 } 2444 }
2425 2445
@@ -2428,7 +2448,8 @@ bed:
2428 IRDA_WARNING("%s: busy with a previous query\n", 2448 IRDA_WARNING("%s: busy with a previous query\n",
2429 __func__); 2449 __func__);
2430 kfree(ias_opt); 2450 kfree(ias_opt);
2431 return -EBUSY; 2451 err = -EBUSY;
2452 goto out;
2432 } 2453 }
2433 2454
2434 self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, 2455 self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
@@ -2436,7 +2457,8 @@ bed:
2436 2457
2437 if (self->iriap == NULL) { 2458 if (self->iriap == NULL) {
2438 kfree(ias_opt); 2459 kfree(ias_opt);
2439 return -ENOMEM; 2460 err = -ENOMEM;
2461 goto out;
2440 } 2462 }
2441 2463
2442 /* Treat unexpected wakeup as disconnect */ 2464 /* Treat unexpected wakeup as disconnect */
@@ -2455,7 +2477,8 @@ bed:
2455 * we can free it regardless! */ 2477 * we can free it regardless! */
2456 kfree(ias_opt); 2478 kfree(ias_opt);
2457 /* Treat signals as disconnect */ 2479 /* Treat signals as disconnect */
2458 return -EHOSTUNREACH; 2480 err = -EHOSTUNREACH;
2481 goto out;
2459 } 2482 }
2460 2483
2461 /* Check what happened */ 2484 /* Check what happened */
@@ -2465,9 +2488,11 @@ bed:
2465 /* Requested object/attribute doesn't exist */ 2488 /* Requested object/attribute doesn't exist */
2466 if((self->errno == IAS_CLASS_UNKNOWN) || 2489 if((self->errno == IAS_CLASS_UNKNOWN) ||
2467 (self->errno == IAS_ATTRIB_UNKNOWN)) 2490 (self->errno == IAS_ATTRIB_UNKNOWN))
2468 return (-EADDRNOTAVAIL); 2491 err = -EADDRNOTAVAIL;
2469 else 2492 else
2470 return (-EHOSTUNREACH); 2493 err = -EHOSTUNREACH;
2494
2495 goto out;
2471 } 2496 }
2472 2497
2473 /* Translate from internal to user structure */ 2498 /* Translate from internal to user structure */
@@ -2476,14 +2501,15 @@ bed:
2476 irias_delete_value(self->ias_result); 2501 irias_delete_value(self->ias_result);
2477 if (err) { 2502 if (err) {
2478 kfree(ias_opt); 2503 kfree(ias_opt);
2479 return err; 2504 goto out;
2480 } 2505 }
2481 2506
2482 /* Copy reply to the user */ 2507 /* Copy reply to the user */
2483 if (copy_to_user(optval, ias_opt, 2508 if (copy_to_user(optval, ias_opt,
2484 sizeof(struct irda_ias_set))) { 2509 sizeof(struct irda_ias_set))) {
2485 kfree(ias_opt); 2510 kfree(ias_opt);
2486 return -EFAULT; 2511 err = -EFAULT;
2512 goto out;
2487 } 2513 }
2488 /* Note : don't need to put optlen, we checked it */ 2514 /* Note : don't need to put optlen, we checked it */
2489 kfree(ias_opt); 2515 kfree(ias_opt);
@@ -2504,11 +2530,15 @@ bed:
2504 */ 2530 */
2505 2531
2506 /* Check that the user is passing us an int */ 2532 /* Check that the user is passing us an int */
2507 if (len != sizeof(int)) 2533 if (len != sizeof(int)) {
2508 return -EINVAL; 2534 err = -EINVAL;
2535 goto out;
2536 }
2509 /* Get timeout in ms (max time we block the caller) */ 2537 /* Get timeout in ms (max time we block the caller) */
2510 if (get_user(val, (int __user *)optval)) 2538 if (get_user(val, (int __user *)optval)) {
2511 return -EFAULT; 2539 err = -EFAULT;
2540 goto out;
2541 }
2512 2542
2513 /* Tell IrLMP we want to be notified */ 2543 /* Tell IrLMP we want to be notified */
2514 irlmp_update_client(self->ckey, self->mask.word, 2544 irlmp_update_client(self->ckey, self->mask.word,
@@ -2520,8 +2550,6 @@ bed:
2520 2550
2521 /* Wait until a node is discovered */ 2551 /* Wait until a node is discovered */
2522 if (!self->cachedaddr) { 2552 if (!self->cachedaddr) {
2523 int ret = 0;
2524
2525 IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __func__); 2553 IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __func__);
2526 2554
2527 /* Set watchdog timer to expire in <val> ms. */ 2555 /* Set watchdog timer to expire in <val> ms. */
@@ -2534,7 +2562,7 @@ bed:
2534 /* Wait for IR-LMP to call us back */ 2562 /* Wait for IR-LMP to call us back */
2535 __wait_event_interruptible(self->query_wait, 2563 __wait_event_interruptible(self->query_wait,
2536 (self->cachedaddr != 0 || self->errno == -ETIME), 2564 (self->cachedaddr != 0 || self->errno == -ETIME),
2537 ret); 2565 err);
2538 2566
2539 /* If watchdog is still activated, kill it! */ 2567 /* If watchdog is still activated, kill it! */
2540 if(timer_pending(&(self->watchdog))) 2568 if(timer_pending(&(self->watchdog)))
@@ -2542,8 +2570,8 @@ bed:
2542 2570
2543 IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__); 2571 IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__);
2544 2572
2545 if (ret != 0) 2573 if (err != 0)
2546 return ret; 2574 goto out;
2547 } 2575 }
2548 else 2576 else
2549 IRDA_DEBUG(1, "%s(), found immediately !\n", 2577 IRDA_DEBUG(1, "%s(), found immediately !\n",
@@ -2566,25 +2594,19 @@ bed:
2566 * If the user want more details, he should query 2594 * If the user want more details, he should query
2567 * the whole discovery log and pick one device... 2595 * the whole discovery log and pick one device...
2568 */ 2596 */
2569 if (put_user(daddr, (int __user *)optval)) 2597 if (put_user(daddr, (int __user *)optval)) {
2570 return -EFAULT; 2598 err = -EFAULT;
2599 goto out;
2600 }
2571 2601
2572 break; 2602 break;
2573 default: 2603 default:
2574 return -ENOPROTOOPT; 2604 err = -ENOPROTOOPT;
2575 } 2605 }
2576 2606
2577 return 0; 2607out:
2578}
2579
2580static int irda_getsockopt(struct socket *sock, int level, int optname,
2581 char __user *optval, int __user *optlen)
2582{
2583 int err;
2584 2608
2585 lock_kernel(); 2609 release_sock(sk);
2586 err = __irda_getsockopt(sock, level, optname, optval, optlen);
2587 unlock_kernel();
2588 2610
2589 return err; 2611 return err;
2590} 2612}
@@ -2628,7 +2650,7 @@ static const struct proto_ops irda_seqpacket_ops = {
2628 .socketpair = sock_no_socketpair, 2650 .socketpair = sock_no_socketpair,
2629 .accept = irda_accept, 2651 .accept = irda_accept,
2630 .getname = irda_getname, 2652 .getname = irda_getname,
2631 .poll = irda_datagram_poll, 2653 .poll = datagram_poll,
2632 .ioctl = irda_ioctl, 2654 .ioctl = irda_ioctl,
2633#ifdef CONFIG_COMPAT 2655#ifdef CONFIG_COMPAT
2634 .compat_ioctl = irda_compat_ioctl, 2656 .compat_ioctl = irda_compat_ioctl,
@@ -2652,7 +2674,7 @@ static const struct proto_ops irda_dgram_ops = {
2652 .socketpair = sock_no_socketpair, 2674 .socketpair = sock_no_socketpair,
2653 .accept = irda_accept, 2675 .accept = irda_accept,
2654 .getname = irda_getname, 2676 .getname = irda_getname,
2655 .poll = irda_datagram_poll, 2677 .poll = datagram_poll,
2656 .ioctl = irda_ioctl, 2678 .ioctl = irda_ioctl,
2657#ifdef CONFIG_COMPAT 2679#ifdef CONFIG_COMPAT
2658 .compat_ioctl = irda_compat_ioctl, 2680 .compat_ioctl = irda_compat_ioctl,
@@ -2677,7 +2699,7 @@ static const struct proto_ops irda_ultra_ops = {
2677 .socketpair = sock_no_socketpair, 2699 .socketpair = sock_no_socketpair,
2678 .accept = sock_no_accept, 2700 .accept = sock_no_accept,
2679 .getname = irda_getname, 2701 .getname = irda_getname,
2680 .poll = irda_datagram_poll, 2702 .poll = datagram_poll,
2681 .ioctl = irda_ioctl, 2703 .ioctl = irda_ioctl,
2682#ifdef CONFIG_COMPAT 2704#ifdef CONFIG_COMPAT
2683 .compat_ioctl = irda_compat_ioctl, 2705 .compat_ioctl = irda_compat_ioctl,
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index c1c8ae939126..36c3f037f172 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
315 315
316 /* Get the actual number of device in the buffer and return */ 316 /* Get the actual number of device in the buffer and return */
317 *pn = i; 317 *pn = i;
318 return(buffer); 318 return buffer;
319} 319}
320 320
321#ifdef CONFIG_PROC_FS 321#ifdef CONFIG_PROC_FS
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index faa82ca2dfdc..a39cca8331df 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
449 } 449 }
450 450
451#ifdef SERIAL_DO_RESTART 451#ifdef SERIAL_DO_RESTART
452 return ((self->flags & ASYNC_HUP_NOTIFY) ? 452 return (self->flags & ASYNC_HUP_NOTIFY) ?
453 -EAGAIN : -ERESTARTSYS); 453 -EAGAIN : -ERESTARTSYS;
454#else 454#else
455 return -EAGAIN; 455 return -EAGAIN;
456#endif 456#endif
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index fce364c6c71a..5b743bdd89ba 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -502,7 +502,8 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
502 IRDA_DEBUG(4, "%s(), strlen=%d\n", __func__, value_len); 502 IRDA_DEBUG(4, "%s(), strlen=%d\n", __func__, value_len);
503 503
504 /* Make sure the string is null-terminated */ 504 /* Make sure the string is null-terminated */
505 fp[n+value_len] = 0x00; 505 if (n + value_len < skb->len)
506 fp[n + value_len] = 0x00;
506 IRDA_DEBUG(4, "Got string %s\n", fp+n); 507 IRDA_DEBUG(4, "Got string %s\n", fp+n);
507 508
508 /* Will truncate to IAS_MAX_STRING bytes */ 509 /* Will truncate to IAS_MAX_STRING bytes */
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105cc..8ee1ff6c742f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list( struct net_device *dev);
48static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
49 48
50static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
52 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
53 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
54 .ndo_get_stats = irlan_eth_get_stats,
55 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
56 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
57 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
208 * tried :-) DB 206 * tried :-) DB
209 */ 207 */
210 /* irttp_data_request already free the packet */ 208 /* irttp_data_request already free the packet */
211 self->stats.tx_dropped++; 209 dev->stats.tx_dropped++;
212 } else { 210 } else {
213 self->stats.tx_packets++; 211 dev->stats.tx_packets++;
214 self->stats.tx_bytes += len; 212 dev->stats.tx_bytes += len;
215 } 213 }
216 214
217 return NETDEV_TX_OK; 215 return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
226int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) 224int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
227{ 225{
228 struct irlan_cb *self = instance; 226 struct irlan_cb *self = instance;
227 struct net_device *dev = self->dev;
229 228
230 if (skb == NULL) { 229 if (skb == NULL) {
231 ++self->stats.rx_dropped; 230 dev->stats.rx_dropped++;
232 return 0; 231 return 0;
233 } 232 }
234 if (skb->len < ETH_HLEN) { 233 if (skb->len < ETH_HLEN) {
235 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", 234 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
236 __func__, skb->len); 235 __func__, skb->len);
237 ++self->stats.rx_dropped; 236 dev->stats.rx_dropped++;
238 dev_kfree_skb(skb); 237 dev_kfree_skb(skb);
239 return 0; 238 return 0;
240 } 239 }
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
244 * might have been previously set by the low level IrDA network 243 * might have been previously set by the low level IrDA network
245 * device driver 244 * device driver
246 */ 245 */
247 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ 246 skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
248 247
249 self->stats.rx_packets++; 248 dev->stats.rx_packets++;
250 self->stats.rx_bytes += skb->len; 249 dev->stats.rx_bytes += skb->len;
251 250
252 netif_rx(skb); /* Eat it! */ 251 netif_rx(skb); /* Eat it! */
253 252
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
348 else 347 else
349 irlan_set_broadcast_filter(self, FALSE); 348 irlan_set_broadcast_filter(self, FALSE);
350} 349}
351
352/*
353 * Function irlan_get_stats (dev)
354 *
355 * Get the current statistics for this device
356 *
357 */
358static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
359{
360 struct irlan_cb *self = netdev_priv(dev);
361
362 return &self->stats;
363}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
index cbcb4eb54037..43f16040a6fe 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/net/irda/irlan/irlan_event.c
@@ -24,7 +24,7 @@
24 24
25#include <net/irda/irlan_event.h> 25#include <net/irda/irlan_event.h>
26 26
27char *irlan_state[] = { 27const char * const irlan_state[] = {
28 "IRLAN_IDLE", 28 "IRLAN_IDLE",
29 "IRLAN_QUERY", 29 "IRLAN_QUERY",
30 "IRLAN_CONN", 30 "IRLAN_CONN",
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 0e7d8bde145d..6115a44c0a24 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
939 } 939 }
940 940
941 /* Return current cached discovery log */ 941 /* Return current cached discovery log */
942 return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE)); 942 return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
943} 943}
944EXPORT_SYMBOL(irlmp_get_discoveries); 944EXPORT_SYMBOL(irlmp_get_discoveries);
945 945
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 3750884094da..062e63b1c5c4 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
448 (self->cache.slsap_sel == slsap_sel) && 448 (self->cache.slsap_sel == slsap_sel) &&
449 (self->cache.dlsap_sel == dlsap_sel)) 449 (self->cache.dlsap_sel == dlsap_sel))
450 { 450 {
451 return (self->cache.lsap); 451 return self->cache.lsap;
452 } 452 }
453#endif 453#endif
454 454
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 4300df35d37d..0d82ff5aeff1 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -458,6 +458,8 @@ typedef struct irnet_socket
458 int disco_index; /* Last read in the discovery log */ 458 int disco_index; /* Last read in the discovery log */
459 int disco_number; /* Size of the discovery log */ 459 int disco_number; /* Size of the discovery log */
460 460
461 struct mutex lock;
462
461} irnet_socket; 463} irnet_socket;
462 464
463/* 465/*
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index e98e40d76f4f..7f17a8020e8a 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket * self,
238 DEXIT(IRDA_SR_TRACE, "\n"); 238 DEXIT(IRDA_SR_TRACE, "\n");
239 239
240 /* Return the TSAP */ 240 /* Return the TSAP */
241 return(dtsap_sel); 241 return dtsap_sel;
242} 242}
243 243
244/*------------------------------------------------------------------*/ 244/*------------------------------------------------------------------*/
@@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket * self)
301 { 301 {
302 clear_bit(0, &self->ttp_connect); 302 clear_bit(0, &self->ttp_connect);
303 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 303 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
304 return(err); 304 return err;
305 } 305 }
306 306
307 /* Connect to remote device */ 307 /* Connect to remote device */
@@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket * self)
312 { 312 {
313 clear_bit(0, &self->ttp_connect); 313 clear_bit(0, &self->ttp_connect);
314 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 314 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
315 return(err); 315 return err;
316 } 316 }
317 317
318 /* The above call is non-blocking. 318 /* The above call is non-blocking.
@@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket * self)
321 * See you there ;-) */ 321 * See you there ;-) */
322 322
323 DEXIT(IRDA_SR_TRACE, "\n"); 323 DEXIT(IRDA_SR_TRACE, "\n");
324 return(err); 324 return err;
325} 325}
326 326
327/*------------------------------------------------------------------*/ 327/*------------------------------------------------------------------*/
@@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket * self)
362 /* The above request is non-blocking. 362 /* The above request is non-blocking.
363 * After a while, IrDA will call us back in irnet_discovervalue_confirm() 363 * After a while, IrDA will call us back in irnet_discovervalue_confirm()
364 * We will then call irnet_ias_to_tsap() and come back here again... */ 364 * We will then call irnet_ias_to_tsap() and come back here again... */
365 return(0); 365 return 0;
366 } 366 }
367 else 367 else
368 return(1); 368 return 1;
369} 369}
370 370
371/*------------------------------------------------------------------*/ 371/*------------------------------------------------------------------*/
@@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket * self)
436 /* Follow me in irnet_discovervalue_confirm() */ 436 /* Follow me in irnet_discovervalue_confirm() */
437 437
438 DEXIT(IRDA_SR_TRACE, "\n"); 438 DEXIT(IRDA_SR_TRACE, "\n");
439 return(0); 439 return 0;
440} 440}
441 441
442/*------------------------------------------------------------------*/ 442/*------------------------------------------------------------------*/
@@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket * self)
485 /* No luck ! */ 485 /* No luck ! */
486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); 486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
487 kfree(discoveries); 487 kfree(discoveries);
488 return(-EADDRNOTAVAIL); 488 return -EADDRNOTAVAIL;
489} 489}
490 490
491 491
@@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket * self)
527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); 527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
528 528
529 DEXIT(IRDA_SOCK_TRACE, "\n"); 529 DEXIT(IRDA_SOCK_TRACE, "\n");
530 return(0); 530 return 0;
531} 531}
532 532
533/*------------------------------------------------------------------*/ 533/*------------------------------------------------------------------*/
@@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket * self)
601 * We will finish the connection procedure in irnet_connect_tsap(). 601 * We will finish the connection procedure in irnet_connect_tsap().
602 */ 602 */
603 DEXIT(IRDA_SOCK_TRACE, "\n"); 603 DEXIT(IRDA_SOCK_TRACE, "\n");
604 return(0); 604 return 0;
605} 605}
606 606
607/*------------------------------------------------------------------*/ 607/*------------------------------------------------------------------*/
@@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket * self)
733 /* No luck ! */ 733 /* No luck ! */
734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); 734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
735 kfree(discoveries); 735 kfree(discoveries);
736 return(-EADDRNOTAVAIL); 736 return -EADDRNOTAVAIL;
737} 737}
738 738
739/*------------------------------------------------------------------*/ 739/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index dfe7b38dd4af..7fa86373de41 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket * ap,
166 } 166 }
167 167
168 /* Success : we have parsed all commands successfully */ 168 /* Success : we have parsed all commands successfully */
169 return(count); 169 return count;
170} 170}
171 171
172#ifdef INITIAL_DISCOVERY 172#ifdef INITIAL_DISCOVERY
@@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket * ap,
300 } 300 }
301 301
302 DEXIT(CTRL_TRACE, "\n"); 302 DEXIT(CTRL_TRACE, "\n");
303 return(strlen(event)); 303 return strlen(event);
304 } 304 }
305#endif /* INITIAL_DISCOVERY */ 305#endif /* INITIAL_DISCOVERY */
306 306
@@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket * ap,
409 } 409 }
410 410
411 DEXIT(CTRL_TRACE, "\n"); 411 DEXIT(CTRL_TRACE, "\n");
412 return(strlen(event)); 412 return strlen(event);
413} 413}
414 414
415/*------------------------------------------------------------------*/ 415/*------------------------------------------------------------------*/
@@ -480,7 +480,6 @@ dev_irnet_open(struct inode * inode,
480 ap = kzalloc(sizeof(*ap), GFP_KERNEL); 480 ap = kzalloc(sizeof(*ap), GFP_KERNEL);
481 DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); 481 DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
482 482
483 lock_kernel();
484 /* initialize the irnet structure */ 483 /* initialize the irnet structure */
485 ap->file = file; 484 ap->file = file;
486 485
@@ -502,18 +501,20 @@ dev_irnet_open(struct inode * inode,
502 { 501 {
503 DERROR(FS_ERROR, "Can't setup IrDA link...\n"); 502 DERROR(FS_ERROR, "Can't setup IrDA link...\n");
504 kfree(ap); 503 kfree(ap);
505 unlock_kernel(); 504
506 return err; 505 return err;
507 } 506 }
508 507
509 /* For the control channel */ 508 /* For the control channel */
510 ap->event_index = irnet_events.index; /* Cancel all past events */ 509 ap->event_index = irnet_events.index; /* Cancel all past events */
511 510
511 mutex_init(&ap->lock);
512
512 /* Put our stuff where we will be able to find it later */ 513 /* Put our stuff where we will be able to find it later */
513 file->private_data = ap; 514 file->private_data = ap;
514 515
515 DEXIT(FS_TRACE, " - ap=0x%p\n", ap); 516 DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
516 unlock_kernel(); 517
517 return 0; 518 return 0;
518} 519}
519 520
@@ -623,7 +624,7 @@ dev_irnet_poll(struct file * file,
623 mask |= irnet_ctrl_poll(ap, file, wait); 624 mask |= irnet_ctrl_poll(ap, file, wait);
624 625
625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask); 626 DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
626 return(mask); 627 return mask;
627} 628}
628 629
629/*------------------------------------------------------------------*/ 630/*------------------------------------------------------------------*/
@@ -663,8 +664,10 @@ dev_irnet_ioctl(
663 if((val == N_SYNC_PPP) || (val == N_PPP)) 664 if((val == N_SYNC_PPP) || (val == N_PPP))
664 { 665 {
665 DEBUG(FS_INFO, "Entering PPP discipline.\n"); 666 DEBUG(FS_INFO, "Entering PPP discipline.\n");
666 /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/ 667 /* PPP channel setup (ap->chan in configured in dev_irnet_open())*/
667 lock_kernel(); 668 if (mutex_lock_interruptible(&ap->lock))
669 return -EINTR;
670
668 err = ppp_register_channel(&ap->chan); 671 err = ppp_register_channel(&ap->chan);
669 if(err == 0) 672 if(err == 0)
670 { 673 {
@@ -677,14 +680,17 @@ dev_irnet_ioctl(
677 } 680 }
678 else 681 else
679 DERROR(FS_ERROR, "Can't setup PPP channel...\n"); 682 DERROR(FS_ERROR, "Can't setup PPP channel...\n");
680 unlock_kernel(); 683
684 mutex_unlock(&ap->lock);
681 } 685 }
682 else 686 else
683 { 687 {
684 /* In theory, should be N_TTY */ 688 /* In theory, should be N_TTY */
685 DEBUG(FS_INFO, "Exiting PPP discipline.\n"); 689 DEBUG(FS_INFO, "Exiting PPP discipline.\n");
686 /* Disconnect from the generic PPP layer */ 690 /* Disconnect from the generic PPP layer */
687 lock_kernel(); 691 if (mutex_lock_interruptible(&ap->lock))
692 return -EINTR;
693
688 if(ap->ppp_open) 694 if(ap->ppp_open)
689 { 695 {
690 ap->ppp_open = 0; 696 ap->ppp_open = 0;
@@ -693,24 +699,31 @@ dev_irnet_ioctl(
693 else 699 else
694 DERROR(FS_ERROR, "Channel not registered !\n"); 700 DERROR(FS_ERROR, "Channel not registered !\n");
695 err = 0; 701 err = 0;
696 unlock_kernel(); 702
703 mutex_unlock(&ap->lock);
697 } 704 }
698 break; 705 break;
699 706
700 /* Query PPP channel and unit number */ 707 /* Query PPP channel and unit number */
701 case PPPIOCGCHAN: 708 case PPPIOCGCHAN:
702 lock_kernel(); 709 if (mutex_lock_interruptible(&ap->lock))
710 return -EINTR;
711
703 if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), 712 if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
704 (int __user *)argp)) 713 (int __user *)argp))
705 err = 0; 714 err = 0;
706 unlock_kernel(); 715
716 mutex_unlock(&ap->lock);
707 break; 717 break;
708 case PPPIOCGUNIT: 718 case PPPIOCGUNIT:
709 lock_kernel(); 719 if (mutex_lock_interruptible(&ap->lock))
720 return -EINTR;
721
710 if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), 722 if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
711 (int __user *)argp)) 723 (int __user *)argp))
712 err = 0; 724 err = 0;
713 unlock_kernel(); 725
726 mutex_unlock(&ap->lock);
714 break; 727 break;
715 728
716 /* All these ioctls can be passed both directly and from ppp_generic, 729 /* All these ioctls can be passed both directly and from ppp_generic,
@@ -730,9 +743,12 @@ dev_irnet_ioctl(
730 if(!capable(CAP_NET_ADMIN)) 743 if(!capable(CAP_NET_ADMIN))
731 err = -EPERM; 744 err = -EPERM;
732 else { 745 else {
733 lock_kernel(); 746 if (mutex_lock_interruptible(&ap->lock))
747 return -EINTR;
748
734 err = ppp_irnet_ioctl(&ap->chan, cmd, arg); 749 err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
735 unlock_kernel(); 750
751 mutex_unlock(&ap->lock);
736 } 752 }
737 break; 753 break;
738 754
@@ -740,7 +756,9 @@ dev_irnet_ioctl(
740 /* Get termios */ 756 /* Get termios */
741 case TCGETS: 757 case TCGETS:
742 DEBUG(FS_INFO, "Get termios.\n"); 758 DEBUG(FS_INFO, "Get termios.\n");
743 lock_kernel(); 759 if (mutex_lock_interruptible(&ap->lock))
760 return -EINTR;
761
744#ifndef TCGETS2 762#ifndef TCGETS2
745 if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) 763 if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
746 err = 0; 764 err = 0;
@@ -748,12 +766,15 @@ dev_irnet_ioctl(
748 if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) 766 if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
749 err = 0; 767 err = 0;
750#endif 768#endif
751 unlock_kernel(); 769
770 mutex_unlock(&ap->lock);
752 break; 771 break;
753 /* Set termios */ 772 /* Set termios */
754 case TCSETSF: 773 case TCSETSF:
755 DEBUG(FS_INFO, "Set termios.\n"); 774 DEBUG(FS_INFO, "Set termios.\n");
756 lock_kernel(); 775 if (mutex_lock_interruptible(&ap->lock))
776 return -EINTR;
777
757#ifndef TCGETS2 778#ifndef TCGETS2
758 if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) 779 if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
759 err = 0; 780 err = 0;
@@ -761,7 +782,8 @@ dev_irnet_ioctl(
761 if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) 782 if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
762 err = 0; 783 err = 0;
763#endif 784#endif
764 unlock_kernel(); 785
786 mutex_unlock(&ap->lock);
765 break; 787 break;
766 788
767 /* Set DTR/RTS */ 789 /* Set DTR/RTS */
@@ -784,9 +806,10 @@ dev_irnet_ioctl(
784 * We should also worry that we don't accept junk here and that 806 * We should also worry that we don't accept junk here and that
785 * we get rid of our own buffers */ 807 * we get rid of our own buffers */
786#ifdef FLUSH_TO_PPP 808#ifdef FLUSH_TO_PPP
787 lock_kernel(); 809 if (mutex_lock_interruptible(&ap->lock))
810 return -EINTR;
788 ppp_output_wakeup(&ap->chan); 811 ppp_output_wakeup(&ap->chan);
789 unlock_kernel(); 812 mutex_unlock(&ap->lock);
790#endif /* FLUSH_TO_PPP */ 813#endif /* FLUSH_TO_PPP */
791 err = 0; 814 err = 0;
792 break; 815 break;
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index b5df2418f90c..940225866da0 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -103,7 +103,8 @@ static const struct file_operations irnet_device_fops =
103 .poll = dev_irnet_poll, 103 .poll = dev_irnet_poll,
104 .unlocked_ioctl = dev_irnet_ioctl, 104 .unlocked_ioctl = dev_irnet_ioctl,
105 .open = dev_irnet_open, 105 .open = dev_irnet_open,
106 .release = dev_irnet_close 106 .release = dev_irnet_close,
107 .llseek = noop_llseek,
107 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ 108 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
108}; 109};
109 110
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index fc1a20565e2d..71cd38c1a67f 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -298,6 +298,8 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
298 298
299 p.pi = pi; /* In case handler needs to know */ 299 p.pi = pi; /* In case handler needs to know */
300 p.pl = buf[1]; /* Extract length of value */ 300 p.pl = buf[1]; /* Extract length of value */
301 if (p.pl > 32)
302 p.pl = 32;
301 303
302 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __func__, 304 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __func__,
303 p.pi, p.pl); 305 p.pi, p.pl);
@@ -318,7 +320,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
318 (__u8) str[0], (__u8) str[1]); 320 (__u8) str[0], (__u8) str[1]);
319 321
320 /* Null terminate string */ 322 /* Null terminate string */
321 str[p.pl+1] = '\0'; 323 str[p.pl] = '\0';
322 324
323 p.pv.c = str; /* Handler will need to take a copy */ 325 p.pv.c = str; /* Handler will need to take a copy */
324 326
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 499c045d6910..f7db676de77d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1798,7 +1798,8 @@ static void iucv_work_fn(struct work_struct *work)
1798 * Handles external interrupts coming in from CP. 1798 * Handles external interrupts coming in from CP.
1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). 1799 * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
1800 */ 1800 */
1801static void iucv_external_interrupt(u16 code) 1801static void iucv_external_interrupt(unsigned int ext_int_code,
1802 unsigned int param32, unsigned long param64)
1802{ 1803{
1803 struct iucv_irq_data *p; 1804 struct iucv_irq_data *p;
1804 struct iucv_irq_list *work; 1805 struct iucv_irq_list *work;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 43040e97c474..d87c22df6f1e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto)
565 565
566static uint8_t pfkey_proto_to_xfrm(uint8_t proto) 566static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
567{ 567{
568 return (proto == IPSEC_PROTO_ANY ? 0 : proto); 568 return proto == IPSEC_PROTO_ANY ? 0 : proto;
569} 569}
570 570
571static uint8_t pfkey_proto_from_xfrm(uint8_t proto) 571static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
572{ 572{
573 return (proto ? proto : IPSEC_PROTO_ANY); 573 return proto ? proto : IPSEC_PROTO_ANY;
574} 574}
575 575
576static inline int pfkey_sockaddr_len(sa_family_t family) 576static inline int pfkey_sockaddr_len(sa_family_t family)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1712af1c7b3f..c64ce0a0bb03 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,10 @@ struct l2tp_net {
111 spinlock_t l2tp_session_hlist_lock; 111 spinlock_t l2tp_session_hlist_lock;
112}; 112};
113 113
114static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
115static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
116static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
117
114static inline struct l2tp_net *l2tp_pernet(struct net *net) 118static inline struct l2tp_net *l2tp_pernet(struct net *net)
115{ 119{
116 BUG_ON(!net); 120 BUG_ON(!net);
@@ -118,6 +122,34 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
118 return net_generic(net, l2tp_net_id); 122 return net_generic(net, l2tp_net_id);
119} 123}
120 124
125
126/* Tunnel reference counts. Incremented per session that is added to
127 * the tunnel.
128 */
129static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
130{
131 atomic_inc(&tunnel->ref_count);
132}
133
134static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
135{
136 if (atomic_dec_and_test(&tunnel->ref_count))
137 l2tp_tunnel_free(tunnel);
138}
139#ifdef L2TP_REFCNT_DEBUG
140#define l2tp_tunnel_inc_refcount(_t) do { \
141 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
142 l2tp_tunnel_inc_refcount_1(_t); \
143 } while (0)
144#define l2tp_tunnel_dec_refcount(_t) do { \
145 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
146 l2tp_tunnel_dec_refcount_1(_t); \
147 } while (0)
148#else
149#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
150#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
151#endif
152
121/* Session hash global list for L2TPv3. 153/* Session hash global list for L2TPv3.
122 * The session_id SHOULD be random according to RFC3931, but several 154 * The session_id SHOULD be random according to RFC3931, but several
123 * L2TP implementations use incrementing session_ids. So we do a real 155 * L2TP implementations use incrementing session_ids. So we do a real
@@ -699,8 +731,8 @@ EXPORT_SYMBOL(l2tp_recv_common);
699 * Returns 1 if the packet was not a good data packet and could not be 731 * Returns 1 if the packet was not a good data packet and could not be
700 * forwarded. All such packets are passed up to userspace to deal with. 732 * forwarded. All such packets are passed up to userspace to deal with.
701 */ 733 */
702int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, 734static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
703 int (*payload_hook)(struct sk_buff *skb)) 735 int (*payload_hook)(struct sk_buff *skb))
704{ 736{
705 struct l2tp_session *session = NULL; 737 struct l2tp_session *session = NULL;
706 unsigned char *ptr, *optr; 738 unsigned char *ptr, *optr;
@@ -812,7 +844,6 @@ error:
812 844
813 return 1; 845 return 1;
814} 846}
815EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
816 847
817/* UDP encapsulation receive handler. See net/ipv4/udp.c. 848/* UDP encapsulation receive handler. See net/ipv4/udp.c.
818 * Return codes: 849 * Return codes:
@@ -922,7 +953,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
922 return bufp - optr; 953 return bufp - optr;
923} 954}
924 955
925int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len) 956static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
957 size_t data_len)
926{ 958{
927 struct l2tp_tunnel *tunnel = session->tunnel; 959 struct l2tp_tunnel *tunnel = session->tunnel;
928 unsigned int len = skb->len; 960 unsigned int len = skb->len;
@@ -970,7 +1002,6 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
970 1002
971 return 0; 1003 return 0;
972} 1004}
973EXPORT_SYMBOL_GPL(l2tp_xmit_core);
974 1005
975/* Automatically called when the skb is freed. 1006/* Automatically called when the skb is freed.
976 */ 1007 */
@@ -1089,7 +1120,7 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
1089 * The tunnel context is deleted only when all session sockets have been 1120 * The tunnel context is deleted only when all session sockets have been
1090 * closed. 1121 * closed.
1091 */ 1122 */
1092void l2tp_tunnel_destruct(struct sock *sk) 1123static void l2tp_tunnel_destruct(struct sock *sk)
1093{ 1124{
1094 struct l2tp_tunnel *tunnel; 1125 struct l2tp_tunnel *tunnel;
1095 1126
@@ -1128,11 +1159,10 @@ void l2tp_tunnel_destruct(struct sock *sk)
1128end: 1159end:
1129 return; 1160 return;
1130} 1161}
1131EXPORT_SYMBOL(l2tp_tunnel_destruct);
1132 1162
1133/* When the tunnel is closed, all the attached sessions need to go too. 1163/* When the tunnel is closed, all the attached sessions need to go too.
1134 */ 1164 */
1135void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) 1165static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
1136{ 1166{
1137 int hash; 1167 int hash;
1138 struct hlist_node *walk; 1168 struct hlist_node *walk;
@@ -1193,12 +1223,11 @@ again:
1193 } 1223 }
1194 write_unlock_bh(&tunnel->hlist_lock); 1224 write_unlock_bh(&tunnel->hlist_lock);
1195} 1225}
1196EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
1197 1226
1198/* Really kill the tunnel. 1227/* Really kill the tunnel.
1199 * Come here only when all sessions have been cleared from the tunnel. 1228 * Come here only when all sessions have been cleared from the tunnel.
1200 */ 1229 */
1201void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) 1230static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1202{ 1231{
1203 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); 1232 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1204 1233
@@ -1217,7 +1246,6 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1217 atomic_dec(&l2tp_tunnel_count); 1246 atomic_dec(&l2tp_tunnel_count);
1218 kfree(tunnel); 1247 kfree(tunnel);
1219} 1248}
1220EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
1221 1249
1222/* Create a socket for the tunnel, if one isn't set up by 1250/* Create a socket for the tunnel, if one isn't set up by
1223 * userspace. This is used for static tunnels where there is no 1251 * userspace. This is used for static tunnels where there is no
@@ -1512,7 +1540,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
1512/* We come here whenever a session's send_seq, cookie_len or 1540/* We come here whenever a session's send_seq, cookie_len or
1513 * l2specific_len parameters are set. 1541 * l2specific_len parameters are set.
1514 */ 1542 */
1515void l2tp_session_set_header_len(struct l2tp_session *session, int version) 1543static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1516{ 1544{
1517 if (version == L2TP_HDR_VER_2) { 1545 if (version == L2TP_HDR_VER_2) {
1518 session->hdr_len = 6; 1546 session->hdr_len = 6;
@@ -1525,7 +1553,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1525 } 1553 }
1526 1554
1527} 1555}
1528EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1529 1556
1530struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) 1557struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1531{ 1558{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index f0f318edd3f1..a16a48e79fab 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,48 +231,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); 231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); 232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
233extern int l2tp_session_delete(struct l2tp_session *session); 233extern int l2tp_session_delete(struct l2tp_session *session);
234extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
235extern void l2tp_session_free(struct l2tp_session *session); 234extern void l2tp_session_free(struct l2tp_session *session);
236extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb)); 235extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
237extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
238extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); 236extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
239 237
240extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
241extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); 238extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
242extern void l2tp_tunnel_destruct(struct sock *sk);
243extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
244extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
245 239
246extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); 240extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
247extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); 241extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
248 242
249/* Tunnel reference counts. Incremented per session that is added to
250 * the tunnel.
251 */
252static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
253{
254 atomic_inc(&tunnel->ref_count);
255}
256
257static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
258{
259 if (atomic_dec_and_test(&tunnel->ref_count))
260 l2tp_tunnel_free(tunnel);
261}
262#ifdef L2TP_REFCNT_DEBUG
263#define l2tp_tunnel_inc_refcount(_t) do { \
264 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
265 l2tp_tunnel_inc_refcount_1(_t); \
266 } while (0)
267#define l2tp_tunnel_dec_refcount(_t) do { \
268 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
269 l2tp_tunnel_dec_refcount_1(_t); \
270 } while (0)
271#else
272#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
273#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
274#endif
275
276/* Session reference counts. Incremented when code obtains a reference 243/* Session reference counts. Incremented when code obtains a reference
277 * to a session. 244 * to a session.
278 */ 245 */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 104ec3b283d4..b8dbae82fab8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -249,7 +249,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
249 struct seq_file *seq; 249 struct seq_file *seq;
250 int rc = -ENOMEM; 250 int rc = -ENOMEM;
251 251
252 pd = kzalloc(GFP_KERNEL, sizeof(*pd)); 252 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
253 if (pd == NULL) 253 if (pd == NULL)
254 goto out; 254 goto out;
255 255
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 1ae697681bc7..8d9ce0accc98 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -144,7 +144,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
144 nf_reset(skb); 144 nf_reset(skb);
145 145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++; 147 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len; 148 dev->stats.rx_bytes += data_len;
150 } else 149 } else
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 226a0ae3bcfd..0bf6a59545ab 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -65,9 +65,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif
65 continue; 65 continue;
66 66
67 if ((l2tp->conn_id == tunnel_id) && 67 if ((l2tp->conn_id == tunnel_id) &&
68#ifdef CONFIG_NET_NS 68 net_eq(sock_net(sk), net) &&
69 (sk->sk_net == net) &&
70#endif
71 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && 69 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
72 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 70 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
73 goto found; 71 goto found;
@@ -578,7 +576,7 @@ out:
578 return copied; 576 return copied;
579} 577}
580 578
581struct proto l2tp_ip_prot = { 579static struct proto l2tp_ip_prot = {
582 .name = "L2TP/IP", 580 .name = "L2TP/IP",
583 .owner = THIS_MODULE, 581 .owner = THIS_MODULE,
584 .init = l2tp_ip_open, 582 .init = l2tp_ip_open,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ff954b3e94b6..39a21d0c61c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1768,7 +1768,7 @@ static const struct proto_ops pppol2tp_ops = {
1768 .ioctl = pppox_ioctl, 1768 .ioctl = pppox_ioctl,
1769}; 1769};
1770 1770
1771static struct pppox_proto pppol2tp_proto = { 1771static const struct pppox_proto pppol2tp_proto = {
1772 .create = pppol2tp_create, 1772 .create = pppol2tp_create,
1773 .ioctl = pppol2tp_ioctl 1773 .ioctl = pppol2tp_ioctl
1774}; 1774};
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index a87cb3ba2df6..d2b03e0851ef 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -138,10 +138,8 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 struct crypto_cipher *tfm; 138 struct crypto_cipher *tfm;
139 139
140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
141 if (IS_ERR(tfm)) 141 if (!IS_ERR(tfm))
142 return NULL; 142 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
143
144 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
145 143
146 return tfm; 144 return tfm;
147} 145}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 3d097b3d7b62..b4d66cca76d6 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -119,10 +119,8 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
119 struct crypto_cipher *tfm; 119 struct crypto_cipher *tfm;
120 120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm)) 122 if (!IS_ERR(tfm))
123 return NULL; 123 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126 124
127 return tfm; 125 return tfm;
128} 126}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 965b272499fd..720b7a84af59 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -56,7 +56,7 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
56} 56}
57 57
58void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 58void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
59 u16 initiator, u16 reason) 59 u16 initiator, u16 reason, bool tx)
60{ 60{
61 struct ieee80211_local *local = sta->local; 61 struct ieee80211_local *local = sta->local;
62 struct tid_ampdu_rx *tid_rx; 62 struct tid_ampdu_rx *tid_rx;
@@ -81,20 +81,21 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
81 "aggregation for tid %d\n", tid); 81 "aggregation for tid %d\n", tid);
82 82
83 /* check if this is a self generated aggregation halt */ 83 /* check if this is a self generated aggregation halt */
84 if (initiator == WLAN_BACK_RECIPIENT) 84 if (initiator == WLAN_BACK_RECIPIENT && tx)
85 ieee80211_send_delba(sta->sdata, sta->sta.addr, 85 ieee80211_send_delba(sta->sdata, sta->sta.addr,
86 tid, 0, reason); 86 tid, 0, reason);
87 87
88 del_timer_sync(&tid_rx->session_timer); 88 del_timer_sync(&tid_rx->session_timer);
89 del_timer_sync(&tid_rx->reorder_timer);
89 90
90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); 91 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
91} 92}
92 93
93void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 94void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
94 u16 initiator, u16 reason) 95 u16 initiator, u16 reason, bool tx)
95{ 96{
96 mutex_lock(&sta->ampdu_mlme.mtx); 97 mutex_lock(&sta->ampdu_mlme.mtx);
97 ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason); 98 ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx);
98 mutex_unlock(&sta->ampdu_mlme.mtx); 99 mutex_unlock(&sta->ampdu_mlme.mtx);
99} 100}
100 101
@@ -120,6 +121,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 121 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
121} 122}
122 123
124static void sta_rx_agg_reorder_timer_expired(unsigned long data)
125{
126 u8 *ptid = (u8 *)data;
127 u8 *timer_to_id = ptid - *ptid;
128 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
129 timer_to_tid[0]);
130
131 rcu_read_lock();
132 spin_lock(&sta->lock);
133 ieee80211_release_reorder_timeout(sta, *ptid);
134 spin_unlock(&sta->lock);
135 rcu_read_unlock();
136}
137
123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 138static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
124 u8 dialog_token, u16 status, u16 policy, 139 u8 dialog_token, u16 status, u16 policy,
125 u16 buf_size, u16 timeout) 140 u16 buf_size, u16 timeout)
@@ -251,11 +266,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
251 goto end; 266 goto end;
252 } 267 }
253 268
269 spin_lock_init(&tid_agg_rx->reorder_lock);
270
254 /* rx timer */ 271 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; 272 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; 273 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer); 274 init_timer(&tid_agg_rx->session_timer);
258 275
276 /* rx reorder timer */
277 tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
278 tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
279 init_timer(&tid_agg_rx->reorder_timer);
280
259 /* prepare reordering buffer */ 281 /* prepare reordering buffer */
260 tid_agg_rx->reorder_buf = 282 tid_agg_rx->reorder_buf =
261 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); 283 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 8f23401832b7..d4679b265ba8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -145,7 +145,8 @@ static void kfree_tid_tx(struct rcu_head *rcu_head)
145} 145}
146 146
147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
148 enum ieee80211_back_parties initiator) 148 enum ieee80211_back_parties initiator,
149 bool tx)
149{ 150{
150 struct ieee80211_local *local = sta->local; 151 struct ieee80211_local *local = sta->local;
151 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; 152 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
@@ -185,6 +186,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
185 clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); 186 clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
186 187
187 tid_tx->stop_initiator = initiator; 188 tid_tx->stop_initiator = initiator;
189 tid_tx->tx_stop = tx;
188 190
189 ret = drv_ampdu_action(local, sta->sdata, 191 ret = drv_ampdu_action(local, sta->sdata,
190 IEEE80211_AMPDU_TX_STOP, 192 IEEE80211_AMPDU_TX_STOP,
@@ -577,13 +579,14 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
577EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); 579EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
578 580
579int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 581int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
580 enum ieee80211_back_parties initiator) 582 enum ieee80211_back_parties initiator,
583 bool tx)
581{ 584{
582 int ret; 585 int ret;
583 586
584 mutex_lock(&sta->ampdu_mlme.mtx); 587 mutex_lock(&sta->ampdu_mlme.mtx);
585 588
586 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); 589 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx);
587 590
588 mutex_unlock(&sta->ampdu_mlme.mtx); 591 mutex_unlock(&sta->ampdu_mlme.mtx);
589 592
@@ -672,7 +675,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
672 goto unlock_sta; 675 goto unlock_sta;
673 } 676 }
674 677
675 if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR) 678 if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR && tid_tx->tx_stop)
676 ieee80211_send_delba(sta->sdata, ra, tid, 679 ieee80211_send_delba(sta->sdata, ra, tid,
677 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); 680 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
678 681
@@ -772,7 +775,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
772 775
773 sta->ampdu_mlme.addba_req_num[tid] = 0; 776 sta->ampdu_mlme.addba_req_num[tid] = 0;
774 } else { 777 } else {
775 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); 778 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR,
779 true);
776 } 780 }
777 781
778 out: 782 out:
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 29ac8e1a509e..18bd0e550600 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,33 +19,6 @@
19#include "rate.h" 19#include "rate.h"
20#include "mesh.h" 20#include "mesh.h"
21 21
22static bool nl80211_type_check(enum nl80211_iftype type)
23{
24 switch (type) {
25 case NL80211_IFTYPE_ADHOC:
26 case NL80211_IFTYPE_STATION:
27 case NL80211_IFTYPE_MONITOR:
28#ifdef CONFIG_MAC80211_MESH
29 case NL80211_IFTYPE_MESH_POINT:
30#endif
31 case NL80211_IFTYPE_AP:
32 case NL80211_IFTYPE_AP_VLAN:
33 case NL80211_IFTYPE_WDS:
34 return true;
35 default:
36 return false;
37 }
38}
39
40static bool nl80211_params_check(enum nl80211_iftype type,
41 struct vif_params *params)
42{
43 if (!nl80211_type_check(type))
44 return false;
45
46 return true;
47}
48
49static int ieee80211_add_iface(struct wiphy *wiphy, char *name, 22static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
50 enum nl80211_iftype type, u32 *flags, 23 enum nl80211_iftype type, u32 *flags,
51 struct vif_params *params) 24 struct vif_params *params)
@@ -55,9 +28,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
55 struct ieee80211_sub_if_data *sdata; 28 struct ieee80211_sub_if_data *sdata;
56 int err; 29 int err;
57 30
58 if (!nl80211_params_check(type, params))
59 return -EINVAL;
60
61 err = ieee80211_if_add(local, name, &dev, type, params); 31 err = ieee80211_if_add(local, name, &dev, type, params);
62 if (err || type != NL80211_IFTYPE_MONITOR || !flags) 32 if (err || type != NL80211_IFTYPE_MONITOR || !flags)
63 return err; 33 return err;
@@ -82,12 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
82 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 52 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
83 int ret; 53 int ret;
84 54
85 if (ieee80211_sdata_running(sdata))
86 return -EBUSY;
87
88 if (!nl80211_params_check(type, params))
89 return -EINVAL;
90
91 ret = ieee80211_if_change_type(sdata, type); 55 ret = ieee80211_if_change_type(sdata, type);
92 if (ret) 56 if (ret)
93 return ret; 57 return ret;
@@ -104,54 +68,71 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
104 params && params->use_4addr >= 0) 68 params && params->use_4addr >= 0)
105 sdata->u.mgd.use_4addr = params->use_4addr; 69 sdata->u.mgd.use_4addr = params->use_4addr;
106 70
107 if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) 71 if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) {
108 sdata->u.mntr_flags = *flags; 72 struct ieee80211_local *local = sdata->local;
73
74 if (ieee80211_sdata_running(sdata)) {
75 /*
76 * Prohibit MONITOR_FLAG_COOK_FRAMES to be
77 * changed while the interface is up.
78 * Else we would need to add a lot of cruft
79 * to update everything:
80 * cooked_mntrs, monitor and all fif_* counters
81 * reconfigure hardware
82 */
83 if ((*flags & MONITOR_FLAG_COOK_FRAMES) !=
84 (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES))
85 return -EBUSY;
86
87 ieee80211_adjust_monitor_flags(sdata, -1);
88 sdata->u.mntr_flags = *flags;
89 ieee80211_adjust_monitor_flags(sdata, 1);
90
91 ieee80211_configure_filter(local);
92 } else {
93 /*
94 * Because the interface is down, ieee80211_do_stop
95 * and ieee80211_do_open take care of "everything"
96 * mentioned in the comment above.
97 */
98 sdata->u.mntr_flags = *flags;
99 }
100 }
109 101
110 return 0; 102 return 0;
111} 103}
112 104
113static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, 105static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
114 u8 key_idx, const u8 *mac_addr, 106 u8 key_idx, bool pairwise, const u8 *mac_addr,
115 struct key_params *params) 107 struct key_params *params)
116{ 108{
117 struct ieee80211_sub_if_data *sdata; 109 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
118 struct sta_info *sta = NULL; 110 struct sta_info *sta = NULL;
119 enum ieee80211_key_alg alg;
120 struct ieee80211_key *key; 111 struct ieee80211_key *key;
121 int err; 112 int err;
122 113
123 if (!netif_running(dev)) 114 if (!ieee80211_sdata_running(sdata))
124 return -ENETDOWN; 115 return -ENETDOWN;
125 116
126 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 117 /* reject WEP and TKIP keys if WEP failed to initialize */
127
128 switch (params->cipher) { 118 switch (params->cipher) {
129 case WLAN_CIPHER_SUITE_WEP40: 119 case WLAN_CIPHER_SUITE_WEP40:
130 case WLAN_CIPHER_SUITE_WEP104:
131 alg = ALG_WEP;
132 break;
133 case WLAN_CIPHER_SUITE_TKIP: 120 case WLAN_CIPHER_SUITE_TKIP:
134 alg = ALG_TKIP; 121 case WLAN_CIPHER_SUITE_WEP104:
135 break; 122 if (IS_ERR(sdata->local->wep_tx_tfm))
136 case WLAN_CIPHER_SUITE_CCMP: 123 return -EINVAL;
137 alg = ALG_CCMP;
138 break;
139 case WLAN_CIPHER_SUITE_AES_CMAC:
140 alg = ALG_AES_CMAC;
141 break; 124 break;
142 default: 125 default:
143 return -EINVAL; 126 break;
144 } 127 }
145 128
146 /* reject WEP and TKIP keys if WEP failed to initialize */ 129 key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
147 if ((alg == ALG_WEP || alg == ALG_TKIP) && 130 params->key, params->seq_len, params->seq);
148 IS_ERR(sdata->local->wep_tx_tfm)) 131 if (IS_ERR(key))
149 return -EINVAL; 132 return PTR_ERR(key);
150 133
151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, 134 if (pairwise)
152 params->seq_len, params->seq); 135 key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
153 if (!key)
154 return -ENOMEM;
155 136
156 mutex_lock(&sdata->local->sta_mtx); 137 mutex_lock(&sdata->local->sta_mtx);
157 138
@@ -164,9 +145,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
164 } 145 }
165 } 146 }
166 147
167 ieee80211_key_link(key, sdata, sta); 148 err = ieee80211_key_link(key, sdata, sta);
149 if (err)
150 ieee80211_key_free(sdata->local, key);
168 151
169 err = 0;
170 out_unlock: 152 out_unlock:
171 mutex_unlock(&sdata->local->sta_mtx); 153 mutex_unlock(&sdata->local->sta_mtx);
172 154
@@ -174,7 +156,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
174} 156}
175 157
176static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, 158static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
177 u8 key_idx, const u8 *mac_addr) 159 u8 key_idx, bool pairwise, const u8 *mac_addr)
178{ 160{
179 struct ieee80211_sub_if_data *sdata; 161 struct ieee80211_sub_if_data *sdata;
180 struct sta_info *sta; 162 struct sta_info *sta;
@@ -191,10 +173,17 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
191 if (!sta) 173 if (!sta)
192 goto out_unlock; 174 goto out_unlock;
193 175
194 if (sta->key) { 176 if (pairwise) {
195 ieee80211_key_free(sdata->local, sta->key); 177 if (sta->ptk) {
196 WARN_ON(sta->key); 178 ieee80211_key_free(sdata->local, sta->ptk);
197 ret = 0; 179 ret = 0;
180 }
181 } else {
182 if (sta->gtk[key_idx]) {
183 ieee80211_key_free(sdata->local,
184 sta->gtk[key_idx]);
185 ret = 0;
186 }
198 } 187 }
199 188
200 goto out_unlock; 189 goto out_unlock;
@@ -216,7 +205,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
216} 205}
217 206
218static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, 207static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
219 u8 key_idx, const u8 *mac_addr, void *cookie, 208 u8 key_idx, bool pairwise, const u8 *mac_addr,
209 void *cookie,
220 void (*callback)(void *cookie, 210 void (*callback)(void *cookie,
221 struct key_params *params)) 211 struct key_params *params))
222{ 212{
@@ -224,7 +214,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
224 struct sta_info *sta = NULL; 214 struct sta_info *sta = NULL;
225 u8 seq[6] = {0}; 215 u8 seq[6] = {0};
226 struct key_params params; 216 struct key_params params;
227 struct ieee80211_key *key; 217 struct ieee80211_key *key = NULL;
228 u32 iv32; 218 u32 iv32;
229 u16 iv16; 219 u16 iv16;
230 int err = -ENOENT; 220 int err = -ENOENT;
@@ -238,7 +228,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
238 if (!sta) 228 if (!sta)
239 goto out; 229 goto out;
240 230
241 key = sta->key; 231 if (pairwise)
232 key = sta->ptk;
233 else if (key_idx < NUM_DEFAULT_KEYS)
234 key = sta->gtk[key_idx];
242 } else 235 } else
243 key = sdata->keys[key_idx]; 236 key = sdata->keys[key_idx];
244 237
@@ -247,10 +240,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
247 240
248 memset(&params, 0, sizeof(params)); 241 memset(&params, 0, sizeof(params));
249 242
250 switch (key->conf.alg) { 243 params.cipher = key->conf.cipher;
251 case ALG_TKIP:
252 params.cipher = WLAN_CIPHER_SUITE_TKIP;
253 244
245 switch (key->conf.cipher) {
246 case WLAN_CIPHER_SUITE_TKIP:
254 iv32 = key->u.tkip.tx.iv32; 247 iv32 = key->u.tkip.tx.iv32;
255 iv16 = key->u.tkip.tx.iv16; 248 iv16 = key->u.tkip.tx.iv16;
256 249
@@ -268,8 +261,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
268 params.seq = seq; 261 params.seq = seq;
269 params.seq_len = 6; 262 params.seq_len = 6;
270 break; 263 break;
271 case ALG_CCMP: 264 case WLAN_CIPHER_SUITE_CCMP:
272 params.cipher = WLAN_CIPHER_SUITE_CCMP;
273 seq[0] = key->u.ccmp.tx_pn[5]; 265 seq[0] = key->u.ccmp.tx_pn[5];
274 seq[1] = key->u.ccmp.tx_pn[4]; 266 seq[1] = key->u.ccmp.tx_pn[4];
275 seq[2] = key->u.ccmp.tx_pn[3]; 267 seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +271,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
279 params.seq = seq; 271 params.seq = seq;
280 params.seq_len = 6; 272 params.seq_len = 6;
281 break; 273 break;
282 case ALG_WEP: 274 case WLAN_CIPHER_SUITE_AES_CMAC:
283 if (key->conf.keylen == 5)
284 params.cipher = WLAN_CIPHER_SUITE_WEP40;
285 else
286 params.cipher = WLAN_CIPHER_SUITE_WEP104;
287 break;
288 case ALG_AES_CMAC:
289 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
290 seq[0] = key->u.aes_cmac.tx_pn[5]; 275 seq[0] = key->u.aes_cmac.tx_pn[5];
291 seq[1] = key->u.aes_cmac.tx_pn[4]; 276 seq[1] = key->u.aes_cmac.tx_pn[4];
292 seq[2] = key->u.aes_cmac.tx_pn[3]; 277 seq[2] = key->u.aes_cmac.tx_pn[3];
@@ -342,13 +327,19 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
342 STATION_INFO_TX_BYTES | 327 STATION_INFO_TX_BYTES |
343 STATION_INFO_RX_PACKETS | 328 STATION_INFO_RX_PACKETS |
344 STATION_INFO_TX_PACKETS | 329 STATION_INFO_TX_PACKETS |
345 STATION_INFO_TX_BITRATE; 330 STATION_INFO_TX_RETRIES |
331 STATION_INFO_TX_FAILED |
332 STATION_INFO_TX_BITRATE |
333 STATION_INFO_RX_DROP_MISC;
346 334
347 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 335 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
348 sinfo->rx_bytes = sta->rx_bytes; 336 sinfo->rx_bytes = sta->rx_bytes;
349 sinfo->tx_bytes = sta->tx_bytes; 337 sinfo->tx_bytes = sta->tx_bytes;
350 sinfo->rx_packets = sta->rx_packets; 338 sinfo->rx_packets = sta->rx_packets;
351 sinfo->tx_packets = sta->tx_packets; 339 sinfo->tx_packets = sta->tx_packets;
340 sinfo->tx_retries = sta->tx_retry_count;
341 sinfo->tx_failed = sta->tx_retry_failed;
342 sinfo->rx_dropped_misc = sta->rx_dropped;
352 343
353 if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || 344 if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
354 (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { 345 (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
@@ -634,6 +625,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
634 struct sta_info *sta, 625 struct sta_info *sta,
635 struct station_parameters *params) 626 struct station_parameters *params)
636{ 627{
628 unsigned long flags;
637 u32 rates; 629 u32 rates;
638 int i, j; 630 int i, j;
639 struct ieee80211_supported_band *sband; 631 struct ieee80211_supported_band *sband;
@@ -642,7 +634,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
642 634
643 sband = local->hw.wiphy->bands[local->oper_channel->band]; 635 sband = local->hw.wiphy->bands[local->oper_channel->band];
644 636
645 spin_lock_bh(&sta->lock); 637 spin_lock_irqsave(&sta->flaglock, flags);
646 mask = params->sta_flags_mask; 638 mask = params->sta_flags_mask;
647 set = params->sta_flags_set; 639 set = params->sta_flags_set;
648 640
@@ -669,7 +661,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
669 if (set & BIT(NL80211_STA_FLAG_MFP)) 661 if (set & BIT(NL80211_STA_FLAG_MFP))
670 sta->flags |= WLAN_STA_MFP; 662 sta->flags |= WLAN_STA_MFP;
671 } 663 }
672 spin_unlock_bh(&sta->lock); 664 spin_unlock_irqrestore(&sta->flaglock, flags);
673 665
674 /* 666 /*
675 * cfg80211 validates this (1-2007) and allows setting the AID 667 * cfg80211 validates this (1-2007) and allows setting the AID
@@ -1143,9 +1135,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1143 p.uapsd = false; 1135 p.uapsd = false;
1144 1136
1145 if (drv_conf_tx(local, params->queue, &p)) { 1137 if (drv_conf_tx(local, params->queue, &p)) {
1146 printk(KERN_DEBUG "%s: failed to set TX queue " 1138 wiphy_debug(local->hw.wiphy,
1147 "parameters for queue %d\n", 1139 "failed to set TX queue parameters for queue %d\n",
1148 wiphy_name(local->hw.wiphy), params->queue); 1140 params->queue);
1149 return -EINVAL; 1141 return -EINVAL;
1150 } 1142 }
1151 1143
@@ -1207,15 +1199,26 @@ static int ieee80211_scan(struct wiphy *wiphy,
1207 struct net_device *dev, 1199 struct net_device *dev,
1208 struct cfg80211_scan_request *req) 1200 struct cfg80211_scan_request *req)
1209{ 1201{
1210 struct ieee80211_sub_if_data *sdata; 1202 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1211
1212 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1213 1203
1214 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1204 switch (ieee80211_vif_type_p2p(&sdata->vif)) {
1215 sdata->vif.type != NL80211_IFTYPE_ADHOC && 1205 case NL80211_IFTYPE_STATION:
1216 sdata->vif.type != NL80211_IFTYPE_MESH_POINT && 1206 case NL80211_IFTYPE_ADHOC:
1217 (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon)) 1207 case NL80211_IFTYPE_MESH_POINT:
1208 case NL80211_IFTYPE_P2P_CLIENT:
1209 break;
1210 case NL80211_IFTYPE_P2P_GO:
1211 if (sdata->local->ops->hw_scan)
1212 break;
1213 /* FIXME: implement NoA while scanning in software */
1214 return -EOPNOTSUPP;
1215 case NL80211_IFTYPE_AP:
1216 if (sdata->u.ap.beacon)
1217 return -EOPNOTSUPP;
1218 break;
1219 default:
1218 return -EOPNOTSUPP; 1220 return -EOPNOTSUPP;
1221 }
1219 1222
1220 return ieee80211_request_scan(sdata, req); 1223 return ieee80211_request_scan(sdata, req);
1221} 1224}
@@ -1362,7 +1365,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
1362} 1365}
1363 1366
1364static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev, 1367static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev,
1365 u8 *addr) 1368 const u8 *addr)
1366{ 1369{
1367 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1370 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1368 1371
@@ -1411,7 +1414,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1411 if (!sdata->u.mgd.associated || 1414 if (!sdata->u.mgd.associated ||
1412 sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) { 1415 sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
1413 mutex_lock(&sdata->local->iflist_mtx); 1416 mutex_lock(&sdata->local->iflist_mtx);
1414 ieee80211_recalc_smps(sdata->local, sdata); 1417 ieee80211_recalc_smps(sdata->local);
1415 mutex_unlock(&sdata->local->iflist_mtx); 1418 mutex_unlock(&sdata->local->iflist_mtx);
1416 return 0; 1419 return 0;
1417 } 1420 }
@@ -1541,11 +1544,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1541 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); 1544 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
1542} 1545}
1543 1546
1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1547static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1545 struct ieee80211_channel *chan, 1548 struct ieee80211_channel *chan,
1546 enum nl80211_channel_type channel_type, 1549 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid, 1550 bool channel_type_valid,
1548 const u8 *buf, size_t len, u64 *cookie) 1551 const u8 *buf, size_t len, u64 *cookie)
1549{ 1552{
1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1553 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1551 struct ieee80211_local *local = sdata->local; 1554 struct ieee80211_local *local = sdata->local;
@@ -1566,7 +1569,11 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1566 1569
1567 switch (sdata->vif.type) { 1570 switch (sdata->vif.type) {
1568 case NL80211_IFTYPE_ADHOC: 1571 case NL80211_IFTYPE_ADHOC:
1569 if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) 1572 case NL80211_IFTYPE_AP:
1573 case NL80211_IFTYPE_AP_VLAN:
1574 case NL80211_IFTYPE_P2P_GO:
1575 if (!ieee80211_is_action(mgmt->frame_control) ||
1576 mgmt->u.action.category == WLAN_CATEGORY_PUBLIC)
1570 break; 1577 break;
1571 rcu_read_lock(); 1578 rcu_read_lock();
1572 sta = sta_info_get(sdata, mgmt->da); 1579 sta = sta_info_get(sdata, mgmt->da);
@@ -1575,8 +1582,7 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1575 return -ENOLINK; 1582 return -ENOLINK;
1576 break; 1583 break;
1577 case NL80211_IFTYPE_STATION: 1584 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) 1585 case NL80211_IFTYPE_P2P_CLIENT:
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break; 1586 break;
1581 default: 1587 default:
1582 return -EOPNOTSUPP; 1588 return -EOPNOTSUPP;
@@ -1598,6 +1604,23 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1598 return 0; 1604 return 0;
1599} 1605}
1600 1606
1607static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
1608 struct net_device *dev,
1609 u16 frame_type, bool reg)
1610{
1611 struct ieee80211_local *local = wiphy_priv(wiphy);
1612
1613 if (frame_type != (IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ))
1614 return;
1615
1616 if (reg)
1617 local->probe_req_reg++;
1618 else
1619 local->probe_req_reg--;
1620
1621 ieee80211_queue_work(&local->hw, &local->reconfig_filter);
1622}
1623
1601struct cfg80211_ops mac80211_config_ops = { 1624struct cfg80211_ops mac80211_config_ops = {
1602 .add_virtual_intf = ieee80211_add_iface, 1625 .add_virtual_intf = ieee80211_add_iface,
1603 .del_virtual_intf = ieee80211_del_iface, 1626 .del_virtual_intf = ieee80211_del_iface,
@@ -1647,6 +1670,7 @@ struct cfg80211_ops mac80211_config_ops = {
1647 .set_bitrate_mask = ieee80211_set_bitrate_mask, 1670 .set_bitrate_mask = ieee80211_set_bitrate_mask,
1648 .remain_on_channel = ieee80211_remain_on_channel, 1671 .remain_on_channel = ieee80211_remain_on_channel,
1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1672 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1650 .action = ieee80211_action, 1673 .mgmt_tx = ieee80211_mgmt_tx,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1674 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1675 .mgmt_frame_register = ieee80211_mgmt_frame_register,
1652}; 1676};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 32be11e4c4d9..5b24740fc0b0 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -11,7 +11,7 @@ __ieee80211_get_channel_mode(struct ieee80211_local *local,
11{ 11{
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 13
14 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 14 lockdep_assert_held(&local->iflist_mtx);
15 15
16 list_for_each_entry(sdata, &local->interfaces, list) { 16 list_for_each_entry(sdata, &local->interfaces, list) {
17 if (sdata == ignore) 17 if (sdata == ignore)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index a694c593ff6a..18260aa99c56 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -36,6 +36,7 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \
36static const struct file_operations name## _ops = { \ 36static const struct file_operations name## _ops = { \
37 .read = name## _read, \ 37 .read = name## _read, \
38 .open = mac80211_open_file_generic, \ 38 .open = mac80211_open_file_generic, \
39 .llseek = generic_file_llseek, \
39}; 40};
40 41
41#define DEBUGFS_ADD(name) \ 42#define DEBUGFS_ADD(name) \
@@ -85,13 +86,15 @@ static ssize_t tsf_write(struct file *file,
85 if (strncmp(buf, "reset", 5) == 0) { 86 if (strncmp(buf, "reset", 5) == 0) {
86 if (local->ops->reset_tsf) { 87 if (local->ops->reset_tsf) {
87 drv_reset_tsf(local); 88 drv_reset_tsf(local);
88 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); 89 wiphy_info(local->hw.wiphy, "debugfs reset TSF\n");
89 } 90 }
90 } else { 91 } else {
91 tsf = simple_strtoul(buf, NULL, 0); 92 tsf = simple_strtoul(buf, NULL, 0);
92 if (local->ops->set_tsf) { 93 if (local->ops->set_tsf) {
93 drv_set_tsf(local, tsf); 94 drv_set_tsf(local, tsf);
94 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); 95 wiphy_info(local->hw.wiphy,
96 "debugfs set TSF to %#018llx\n", tsf);
97
95 } 98 }
96 } 99 }
97 100
@@ -101,7 +104,8 @@ static ssize_t tsf_write(struct file *file,
101static const struct file_operations tsf_ops = { 104static const struct file_operations tsf_ops = {
102 .read = tsf_read, 105 .read = tsf_read,
103 .write = tsf_write, 106 .write = tsf_write,
104 .open = mac80211_open_file_generic 107 .open = mac80211_open_file_generic,
108 .llseek = default_llseek,
105}; 109};
106 110
107static ssize_t reset_write(struct file *file, const char __user *user_buf, 111static ssize_t reset_write(struct file *file, const char __user *user_buf,
@@ -120,6 +124,7 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
120static const struct file_operations reset_ops = { 124static const struct file_operations reset_ops = {
121 .write = reset_write, 125 .write = reset_write,
122 .open = mac80211_open_file_generic, 126 .open = mac80211_open_file_generic,
127 .llseek = noop_llseek,
123}; 128};
124 129
125static ssize_t noack_read(struct file *file, char __user *user_buf, 130static ssize_t noack_read(struct file *file, char __user *user_buf,
@@ -155,7 +160,8 @@ static ssize_t noack_write(struct file *file,
155static const struct file_operations noack_ops = { 160static const struct file_operations noack_ops = {
156 .read = noack_read, 161 .read = noack_read,
157 .write = noack_write, 162 .write = noack_write,
158 .open = mac80211_open_file_generic 163 .open = mac80211_open_file_generic,
164 .llseek = default_llseek,
159}; 165};
160 166
161static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf, 167static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf,
@@ -201,7 +207,8 @@ static ssize_t uapsd_queues_write(struct file *file,
201static const struct file_operations uapsd_queues_ops = { 207static const struct file_operations uapsd_queues_ops = {
202 .read = uapsd_queues_read, 208 .read = uapsd_queues_read,
203 .write = uapsd_queues_write, 209 .write = uapsd_queues_write,
204 .open = mac80211_open_file_generic 210 .open = mac80211_open_file_generic,
211 .llseek = default_llseek,
205}; 212};
206 213
207static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf, 214static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf,
@@ -247,7 +254,8 @@ static ssize_t uapsd_max_sp_len_write(struct file *file,
247static const struct file_operations uapsd_max_sp_len_ops = { 254static const struct file_operations uapsd_max_sp_len_ops = {
248 .read = uapsd_max_sp_len_read, 255 .read = uapsd_max_sp_len_read,
249 .write = uapsd_max_sp_len_write, 256 .write = uapsd_max_sp_len_write,
250 .open = mac80211_open_file_generic 257 .open = mac80211_open_file_generic,
258 .llseek = default_llseek,
251}; 259};
252 260
253static ssize_t channel_type_read(struct file *file, char __user *user_buf, 261static ssize_t channel_type_read(struct file *file, char __user *user_buf,
@@ -279,7 +287,8 @@ static ssize_t channel_type_read(struct file *file, char __user *user_buf,
279 287
280static const struct file_operations channel_type_ops = { 288static const struct file_operations channel_type_ops = {
281 .read = channel_type_read, 289 .read = channel_type_read,
282 .open = mac80211_open_file_generic 290 .open = mac80211_open_file_generic,
291 .llseek = default_llseek,
283}; 292};
284 293
285static ssize_t queues_read(struct file *file, char __user *user_buf, 294static ssize_t queues_read(struct file *file, char __user *user_buf,
@@ -302,7 +311,8 @@ static ssize_t queues_read(struct file *file, char __user *user_buf,
302 311
303static const struct file_operations queues_ops = { 312static const struct file_operations queues_ops = {
304 .read = queues_read, 313 .read = queues_read,
305 .open = mac80211_open_file_generic 314 .open = mac80211_open_file_generic,
315 .llseek = default_llseek,
306}; 316};
307 317
308/* statistics stuff */ 318/* statistics stuff */
@@ -346,6 +356,7 @@ static ssize_t stats_ ##name## _read(struct file *file, \
346static const struct file_operations stats_ ##name## _ops = { \ 356static const struct file_operations stats_ ##name## _ops = { \
347 .read = stats_ ##name## _read, \ 357 .read = stats_ ##name## _read, \
348 .open = mac80211_open_file_generic, \ 358 .open = mac80211_open_file_generic, \
359 .llseek = generic_file_llseek, \
349}; 360};
350 361
351#define DEBUGFS_STATS_ADD(name, field) \ 362#define DEBUGFS_STATS_ADD(name, field) \
@@ -366,7 +377,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
366 if (!phyd) 377 if (!phyd)
367 return; 378 return;
368 379
369 local->debugfs.stations = debugfs_create_dir("stations", phyd);
370 local->debugfs.keys = debugfs_create_dir("keys", phyd); 380 local->debugfs.keys = debugfs_create_dir("keys", phyd);
371 381
372 DEBUGFS_ADD(frequency); 382 DEBUGFS_ADD(frequency);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index fa5e76e658ef..1243d1db5c59 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -32,6 +32,7 @@ static ssize_t key_##name##_read(struct file *file, \
32static const struct file_operations key_ ##name## _ops = { \ 32static const struct file_operations key_ ##name## _ops = { \
33 .read = key_##name##_read, \ 33 .read = key_##name##_read, \
34 .open = mac80211_open_file_generic, \ 34 .open = mac80211_open_file_generic, \
35 .llseek = generic_file_llseek, \
35} 36}
36 37
37#define KEY_FILE(name, format) \ 38#define KEY_FILE(name, format) \
@@ -46,6 +47,7 @@ static const struct file_operations key_ ##name## _ops = { \
46static const struct file_operations key_ ##name## _ops = { \ 47static const struct file_operations key_ ##name## _ops = { \
47 .read = key_conf_##name##_read, \ 48 .read = key_conf_##name##_read, \
48 .open = mac80211_open_file_generic, \ 49 .open = mac80211_open_file_generic, \
50 .llseek = generic_file_llseek, \
49} 51}
50 52
51#define KEY_CONF_FILE(name, format) \ 53#define KEY_CONF_FILE(name, format) \
@@ -64,26 +66,13 @@ static ssize_t key_algorithm_read(struct file *file,
64 char __user *userbuf, 66 char __user *userbuf,
65 size_t count, loff_t *ppos) 67 size_t count, loff_t *ppos)
66{ 68{
67 char *alg; 69 char buf[15];
68 struct ieee80211_key *key = file->private_data; 70 struct ieee80211_key *key = file->private_data;
71 u32 c = key->conf.cipher;
69 72
70 switch (key->conf.alg) { 73 sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
71 case ALG_WEP: 74 c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
72 alg = "WEP\n"; 75 return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
73 break;
74 case ALG_TKIP:
75 alg = "TKIP\n";
76 break;
77 case ALG_CCMP:
78 alg = "CCMP\n";
79 break;
80 case ALG_AES_CMAC:
81 alg = "AES-128-CMAC\n";
82 break;
83 default:
84 return 0;
85 }
86 return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
87} 76}
88KEY_OPS(algorithm); 77KEY_OPS(algorithm);
89 78
@@ -95,21 +84,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
95 int len; 84 int len;
96 struct ieee80211_key *key = file->private_data; 85 struct ieee80211_key *key = file->private_data;
97 86
98 switch (key->conf.alg) { 87 switch (key->conf.cipher) {
99 case ALG_WEP: 88 case WLAN_CIPHER_SUITE_WEP40:
89 case WLAN_CIPHER_SUITE_WEP104:
100 len = scnprintf(buf, sizeof(buf), "\n"); 90 len = scnprintf(buf, sizeof(buf), "\n");
101 break; 91 break;
102 case ALG_TKIP: 92 case WLAN_CIPHER_SUITE_TKIP:
103 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 93 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
104 key->u.tkip.tx.iv32, 94 key->u.tkip.tx.iv32,
105 key->u.tkip.tx.iv16); 95 key->u.tkip.tx.iv16);
106 break; 96 break;
107 case ALG_CCMP: 97 case WLAN_CIPHER_SUITE_CCMP:
108 tpn = key->u.ccmp.tx_pn; 98 tpn = key->u.ccmp.tx_pn;
109 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 99 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
110 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 100 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
111 break; 101 break;
112 case ALG_AES_CMAC: 102 case WLAN_CIPHER_SUITE_AES_CMAC:
113 tpn = key->u.aes_cmac.tx_pn; 103 tpn = key->u.aes_cmac.tx_pn;
114 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 104 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
115 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], 105 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -130,11 +120,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
130 int i, len; 120 int i, len;
131 const u8 *rpn; 121 const u8 *rpn;
132 122
133 switch (key->conf.alg) { 123 switch (key->conf.cipher) {
134 case ALG_WEP: 124 case WLAN_CIPHER_SUITE_WEP40:
125 case WLAN_CIPHER_SUITE_WEP104:
135 len = scnprintf(buf, sizeof(buf), "\n"); 126 len = scnprintf(buf, sizeof(buf), "\n");
136 break; 127 break;
137 case ALG_TKIP: 128 case WLAN_CIPHER_SUITE_TKIP:
138 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 129 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
139 p += scnprintf(p, sizeof(buf)+buf-p, 130 p += scnprintf(p, sizeof(buf)+buf-p,
140 "%08x %04x\n", 131 "%08x %04x\n",
@@ -142,7 +133,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 key->u.tkip.rx[i].iv16); 133 key->u.tkip.rx[i].iv16);
143 len = p - buf; 134 len = p - buf;
144 break; 135 break;
145 case ALG_CCMP: 136 case WLAN_CIPHER_SUITE_CCMP:
146 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { 137 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
147 rpn = key->u.ccmp.rx_pn[i]; 138 rpn = key->u.ccmp.rx_pn[i];
148 p += scnprintf(p, sizeof(buf)+buf-p, 139 p += scnprintf(p, sizeof(buf)+buf-p,
@@ -152,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
152 } 143 }
153 len = p - buf; 144 len = p - buf;
154 break; 145 break;
155 case ALG_AES_CMAC: 146 case WLAN_CIPHER_SUITE_AES_CMAC:
156 rpn = key->u.aes_cmac.rx_pn; 147 rpn = key->u.aes_cmac.rx_pn;
157 p += scnprintf(p, sizeof(buf)+buf-p, 148 p += scnprintf(p, sizeof(buf)+buf-p,
158 "%02x%02x%02x%02x%02x%02x\n", 149 "%02x%02x%02x%02x%02x%02x\n",
@@ -174,11 +165,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
174 char buf[20]; 165 char buf[20];
175 int len; 166 int len;
176 167
177 switch (key->conf.alg) { 168 switch (key->conf.cipher) {
178 case ALG_CCMP: 169 case WLAN_CIPHER_SUITE_CCMP:
179 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 170 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
180 break; 171 break;
181 case ALG_AES_CMAC: 172 case WLAN_CIPHER_SUITE_AES_CMAC:
182 len = scnprintf(buf, sizeof(buf), "%u\n", 173 len = scnprintf(buf, sizeof(buf), "%u\n",
183 key->u.aes_cmac.replays); 174 key->u.aes_cmac.replays);
184 break; 175 break;
@@ -196,8 +187,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
196 char buf[20]; 187 char buf[20];
197 int len; 188 int len;
198 189
199 switch (key->conf.alg) { 190 switch (key->conf.cipher) {
200 case ALG_AES_CMAC: 191 case WLAN_CIPHER_SUITE_AES_CMAC:
201 len = scnprintf(buf, sizeof(buf), "%u\n", 192 len = scnprintf(buf, sizeof(buf), "%u\n",
202 key->u.aes_cmac.icverrors); 193 key->u.aes_cmac.icverrors);
203 break; 194 break;
@@ -212,9 +203,13 @@ static ssize_t key_key_read(struct file *file, char __user *userbuf,
212 size_t count, loff_t *ppos) 203 size_t count, loff_t *ppos)
213{ 204{
214 struct ieee80211_key *key = file->private_data; 205 struct ieee80211_key *key = file->private_data;
215 int i, res, bufsize = 2 * key->conf.keylen + 2; 206 int i, bufsize = 2 * key->conf.keylen + 2;
216 char *buf = kmalloc(bufsize, GFP_KERNEL); 207 char *buf = kmalloc(bufsize, GFP_KERNEL);
217 char *p = buf; 208 char *p = buf;
209 ssize_t res;
210
211 if (!buf)
212 return -ENOMEM;
218 213
219 for (i = 0; i < key->conf.keylen; i++) 214 for (i = 0; i < key->conf.keylen; i++)
220 p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]); 215 p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 20b2998fa0ed..cbdf36d7841c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -121,6 +121,7 @@ static const struct file_operations name##_ops = { \
121 .read = ieee80211_if_read_##name, \ 121 .read = ieee80211_if_read_##name, \
122 .write = (_write), \ 122 .write = (_write), \
123 .open = mac80211_open_file_generic, \ 123 .open = mac80211_open_file_generic, \
124 .llseek = generic_file_llseek, \
124} 125}
125 126
126#define __IEEE80211_IF_FILE_W(name) \ 127#define __IEEE80211_IF_FILE_W(name) \
@@ -409,6 +410,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
409 sprintf(buf, "netdev:%s", sdata->name); 410 sprintf(buf, "netdev:%s", sdata->name);
410 sdata->debugfs.dir = debugfs_create_dir(buf, 411 sdata->debugfs.dir = debugfs_create_dir(buf,
411 sdata->local->hw.wiphy->debugfsdir); 412 sdata->local->hw.wiphy->debugfsdir);
413 if (sdata->debugfs.dir)
414 sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
415 sdata->debugfs.dir);
412 add_files(sdata); 416 add_files(sdata);
413} 417}
414 418
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 76839d4dfaac..4601fea1784d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -36,6 +36,7 @@ static ssize_t sta_ ##name## _read(struct file *file, \
36static const struct file_operations sta_ ##name## _ops = { \ 36static const struct file_operations sta_ ##name## _ops = { \
37 .read = sta_##name##_read, \ 37 .read = sta_##name##_read, \
38 .open = mac80211_open_file_generic, \ 38 .open = mac80211_open_file_generic, \
39 .llseek = generic_file_llseek, \
39} 40}
40 41
41#define STA_OPS_RW(name) \ 42#define STA_OPS_RW(name) \
@@ -43,6 +44,7 @@ static const struct file_operations sta_ ##name## _ops = { \
43 .read = sta_##name##_read, \ 44 .read = sta_##name##_read, \
44 .write = sta_##name##_write, \ 45 .write = sta_##name##_write, \
45 .open = mac80211_open_file_generic, \ 46 .open = mac80211_open_file_generic, \
47 .llseek = generic_file_llseek, \
46} 48}
47 49
48#define STA_FILE(name, field, format) \ 50#define STA_FILE(name, field, format) \
@@ -196,7 +198,8 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
196 else 198 else
197 ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); 199 ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
198 } else { 200 } else {
199 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); 201 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
202 3, true);
200 ret = 0; 203 ret = 0;
201 } 204 }
202 205
@@ -300,7 +303,7 @@ STA_OPS(ht_capa);
300 303
301void ieee80211_sta_debugfs_add(struct sta_info *sta) 304void ieee80211_sta_debugfs_add(struct sta_info *sta)
302{ 305{
303 struct dentry *stations_dir = sta->local->debugfs.stations; 306 struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations;
304 u8 mac[3*ETH_ALEN]; 307 u8 mac[3*ETH_ALEN];
305 308
306 sta->debugfs.add_has_run = true; 309 sta->debugfs.add_has_run = true;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544b..16983825f8e8 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int drv_change_interface(struct ieee80211_local *local,
58 struct ieee80211_sub_if_data *sdata,
59 enum nl80211_iftype type, bool p2p)
60{
61 int ret;
62
63 might_sleep();
64
65 trace_drv_change_interface(local, sdata, type, p2p);
66 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
67 trace_drv_return_int(local, ret);
68 return ret;
69}
70
57static inline void drv_remove_interface(struct ieee80211_local *local, 71static inline void drv_remove_interface(struct ieee80211_local *local,
58 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
59{ 73{
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a974668..6831fb1641c8 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {}
25#define STA_PR_FMT " sta:%pM" 25#define STA_PR_FMT " sta:%pM"
26#define STA_PR_ARG __entry->sta_addr 26#define STA_PR_ARG __entry->sta_addr
27 27
28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ 28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
29 __field(bool, p2p) \
29 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 30 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
30#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ 31#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
32 __entry->p2p = sdata->vif.p2p; \
31 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 33 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
32#define VIF_PR_FMT " vif:%s(%d)" 34#define VIF_PR_FMT " vif:%s(%d%s)"
33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type 35#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
34 36
35/* 37/*
36 * Tracing for driver callbacks. 38 * Tracing for driver callbacks.
@@ -136,6 +138,34 @@ TRACE_EVENT(drv_add_interface,
136 ) 138 )
137); 139);
138 140
141TRACE_EVENT(drv_change_interface,
142 TP_PROTO(struct ieee80211_local *local,
143 struct ieee80211_sub_if_data *sdata,
144 enum nl80211_iftype type, bool p2p),
145
146 TP_ARGS(local, sdata, type, p2p),
147
148 TP_STRUCT__entry(
149 LOCAL_ENTRY
150 VIF_ENTRY
151 __field(u32, new_type)
152 __field(bool, new_p2p)
153 ),
154
155 TP_fast_assign(
156 LOCAL_ASSIGN;
157 VIF_ASSIGN;
158 __entry->new_type = type;
159 __entry->new_p2p = p2p;
160 ),
161
162 TP_printk(
163 LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s",
164 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
165 __entry->new_p2p ? "/p2p" : ""
166 )
167);
168
139TRACE_EVENT(drv_remove_interface, 169TRACE_EVENT(drv_remove_interface,
140 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 170 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
141 171
@@ -336,7 +366,7 @@ TRACE_EVENT(drv_set_key,
336 LOCAL_ENTRY 366 LOCAL_ENTRY
337 VIF_ENTRY 367 VIF_ENTRY
338 STA_ENTRY 368 STA_ENTRY
339 __field(enum ieee80211_key_alg, alg) 369 __field(u32, cipher)
340 __field(u8, hw_key_idx) 370 __field(u8, hw_key_idx)
341 __field(u8, flags) 371 __field(u8, flags)
342 __field(s8, keyidx) 372 __field(s8, keyidx)
@@ -346,7 +376,7 @@ TRACE_EVENT(drv_set_key,
346 LOCAL_ASSIGN; 376 LOCAL_ASSIGN;
347 VIF_ASSIGN; 377 VIF_ASSIGN;
348 STA_ASSIGN; 378 STA_ASSIGN;
349 __entry->alg = key->alg; 379 __entry->cipher = key->cipher;
350 __entry->flags = key->flags; 380 __entry->flags = key->flags;
351 __entry->keyidx = key->keyidx; 381 __entry->keyidx = key->keyidx;
352 __entry->hw_key_idx = key->hw_key_idx; 382 __entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb33861..75d679d75e63 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -101,16 +101,16 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
101 ht_cap->mcs.rx_mask[32/8] |= 1; 101 ht_cap->mcs.rx_mask[32/8] |= 1;
102} 102}
103 103
104void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) 104void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx)
105{ 105{
106 int i; 106 int i;
107 107
108 cancel_work_sync(&sta->ampdu_mlme.work); 108 cancel_work_sync(&sta->ampdu_mlme.work);
109 109
110 for (i = 0; i < STA_TID_NUM; i++) { 110 for (i = 0; i < STA_TID_NUM; i++) {
111 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); 111 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx);
112 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 112 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
113 WLAN_REASON_QSTA_LEAVE_QBSS); 113 WLAN_REASON_QSTA_LEAVE_QBSS, tx);
114 } 114 }
115} 115}
116 116
@@ -135,7 +135,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
135 if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) 135 if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
136 ___ieee80211_stop_rx_ba_session( 136 ___ieee80211_stop_rx_ba_session(
137 sta, tid, WLAN_BACK_RECIPIENT, 137 sta, tid, WLAN_BACK_RECIPIENT,
138 WLAN_REASON_QSTA_TIMEOUT); 138 WLAN_REASON_QSTA_TIMEOUT, true);
139 139
140 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 140 tid_tx = sta->ampdu_mlme.tid_tx[tid];
141 if (!tid_tx) 141 if (!tid_tx)
@@ -146,7 +146,8 @@ void ieee80211_ba_session_work(struct work_struct *work)
146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, 146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
147 &tid_tx->state)) 147 &tid_tx->state))
148 ___ieee80211_stop_tx_ba_session(sta, tid, 148 ___ieee80211_stop_tx_ba_session(sta, tid,
149 WLAN_BACK_INITIATOR); 149 WLAN_BACK_INITIATOR,
150 true);
150 } 151 }
151 mutex_unlock(&sta->ampdu_mlme.mtx); 152 mutex_unlock(&sta->ampdu_mlme.mtx);
152} 153}
@@ -214,9 +215,11 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
214#endif /* CONFIG_MAC80211_HT_DEBUG */ 215#endif /* CONFIG_MAC80211_HT_DEBUG */
215 216
216 if (initiator == WLAN_BACK_INITIATOR) 217 if (initiator == WLAN_BACK_INITIATOR)
217 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0); 218 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0,
219 true);
218 else 220 else
219 __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT); 221 __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
222 true);
220} 223}
221 224
222int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 225int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
@@ -265,3 +268,33 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
265 268
266 return 0; 269 return 0;
267} 270}
271
272void ieee80211_request_smps_work(struct work_struct *work)
273{
274 struct ieee80211_sub_if_data *sdata =
275 container_of(work, struct ieee80211_sub_if_data,
276 u.mgd.request_smps_work);
277
278 mutex_lock(&sdata->u.mgd.mtx);
279 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
280 mutex_unlock(&sdata->u.mgd.mtx);
281}
282
283void ieee80211_request_smps(struct ieee80211_vif *vif,
284 enum ieee80211_smps_mode smps_mode)
285{
286 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
287
288 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
289 return;
290
291 if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
292 smps_mode = IEEE80211_SMPS_AUTOMATIC;
293
294 sdata->u.mgd.driver_smps_mode = smps_mode;
295
296 ieee80211_queue_work(&sdata->local->hw,
297 &sdata->u.mgd.request_smps_work);
298}
299/* this might change ... don't want non-open drivers using it */
300EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a7..239c4836a946 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -173,6 +173,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
173 memcpy(skb_put(skb, ifibss->ie_len), 173 memcpy(skb_put(skb, ifibss->ie_len),
174 ifibss->ie, ifibss->ie_len); 174 ifibss->ie, ifibss->ie_len);
175 175
176 if (local->hw.queues >= 4) {
177 pos = skb_put(skb, 9);
178 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
179 *pos++ = 7; /* len */
180 *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
181 *pos++ = 0x50;
182 *pos++ = 0xf2;
183 *pos++ = 2; /* WME */
184 *pos++ = 0; /* WME info */
185 *pos++ = 1; /* WME ver */
186 *pos++ = 0; /* U-APSD no in use */
187 }
188
176 rcu_assign_pointer(ifibss->presp, skb); 189 rcu_assign_pointer(ifibss->presp, skb);
177 190
178 sdata->vif.bss_conf.beacon_int = beacon_int; 191 sdata->vif.bss_conf.beacon_int = beacon_int;
@@ -266,37 +279,45 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
266 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) 279 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
267 return; 280 return;
268 281
269 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && 282 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
270 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) { 283 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) {
271 supp_rates = ieee80211_sta_get_rates(local, elems, band);
272 284
273 rcu_read_lock(); 285 rcu_read_lock();
274
275 sta = sta_info_get(sdata, mgmt->sa); 286 sta = sta_info_get(sdata, mgmt->sa);
276 if (sta) {
277 u32 prev_rates;
278 287
279 prev_rates = sta->sta.supp_rates[band]; 288 if (elems->supp_rates) {
280 /* make sure mandatory rates are always added */ 289 supp_rates = ieee80211_sta_get_rates(local, elems,
281 sta->sta.supp_rates[band] = supp_rates | 290 band);
282 ieee80211_mandatory_rates(local, band); 291 if (sta) {
292 u32 prev_rates;
293
294 prev_rates = sta->sta.supp_rates[band];
295 /* make sure mandatory rates are always added */
296 sta->sta.supp_rates[band] = supp_rates |
297 ieee80211_mandatory_rates(local, band);
283 298
284 if (sta->sta.supp_rates[band] != prev_rates) { 299 if (sta->sta.supp_rates[band] != prev_rates) {
285#ifdef CONFIG_MAC80211_IBSS_DEBUG 300#ifdef CONFIG_MAC80211_IBSS_DEBUG
286 printk(KERN_DEBUG "%s: updated supp_rates set " 301 printk(KERN_DEBUG
287 "for %pM based on beacon/probe_response " 302 "%s: updated supp_rates set "
288 "(0x%x -> 0x%x)\n", 303 "for %pM based on beacon"
289 sdata->name, sta->sta.addr, 304 "/probe_resp (0x%x -> 0x%x)\n",
290 prev_rates, sta->sta.supp_rates[band]); 305 sdata->name, sta->sta.addr,
306 prev_rates,
307 sta->sta.supp_rates[band]);
291#endif 308#endif
292 rate_control_rate_init(sta); 309 rate_control_rate_init(sta);
293 } 310 }
294 rcu_read_unlock(); 311 } else
295 } else { 312 sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid,
296 rcu_read_unlock(); 313 mgmt->sa, supp_rates,
297 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 314 GFP_ATOMIC);
298 supp_rates, GFP_KERNEL);
299 } 315 }
316
317 if (sta && elems->wmm_info)
318 set_sta_flags(sta, WLAN_STA_WME);
319
320 rcu_read_unlock();
300 } 321 }
301 322
302 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, 323 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
@@ -427,14 +448,15 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
427 return NULL; 448 return NULL;
428 449
429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 450#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
430 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", 451 wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n",
431 wiphy_name(local->hw.wiphy), addr, sdata->name); 452 addr, sdata->name);
432#endif 453#endif
433 454
434 sta = sta_info_alloc(sdata, addr, gfp); 455 sta = sta_info_alloc(sdata, addr, gfp);
435 if (!sta) 456 if (!sta)
436 return NULL; 457 return NULL;
437 458
459 sta->last_rx = jiffies;
438 set_sta_flags(sta, WLAN_STA_AUTHORIZED); 460 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
439 461
440 /* make sure mandatory rates are always added */ 462 /* make sure mandatory rates are always added */
@@ -920,12 +942,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 942 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921 sdata->u.ibss.ssid_len = params->ssid_len; 943 sdata->u.ibss.ssid_len = params->ssid_len;
922 944
945 mutex_unlock(&sdata->u.ibss.mtx);
946
947 mutex_lock(&sdata->local->mtx);
923 ieee80211_recalc_idle(sdata->local); 948 ieee80211_recalc_idle(sdata->local);
949 mutex_unlock(&sdata->local->mtx);
924 950
925 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 951 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
926 952
927 mutex_unlock(&sdata->u.ibss.mtx);
928
929 return 0; 953 return 0;
930} 954}
931 955
@@ -980,7 +1004,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
980 1004
981 mutex_unlock(&sdata->u.ibss.mtx); 1005 mutex_unlock(&sdata->u.ibss.mtx);
982 1006
1007 mutex_lock(&local->mtx);
983 ieee80211_recalc_idle(sdata->local); 1008 ieee80211_recalc_idle(sdata->local);
1009 mutex_unlock(&local->mtx);
984 1010
985 return 0; 1011 return 0;
986} 1012}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c2975..b80c38689927 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
50 * increased memory use (about 2 kB of RAM per entry). */ 50 * increased memory use (about 2 kB of RAM per entry). */
51#define IEEE80211_FRAGMENT_MAX 4 51#define IEEE80211_FRAGMENT_MAX 4
52 52
53/*
54 * Time after which we ignore scan results and no longer report/use
55 * them in any way.
56 */
57#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
58
59#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) 53#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
60 54
61#define IEEE80211_DEFAULT_UAPSD_QUEUES \ 55#define IEEE80211_DEFAULT_UAPSD_QUEUES \
@@ -165,12 +159,37 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
165#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u) 159#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
166#define RX_QUEUED ((__force ieee80211_rx_result) 3u) 160#define RX_QUEUED ((__force ieee80211_rx_result) 3u)
167 161
168#define IEEE80211_RX_IN_SCAN BIT(0) 162/**
169/* frame is destined to interface currently processed (incl. multicast frames) */ 163 * enum ieee80211_packet_rx_flags - packet RX flags
170#define IEEE80211_RX_RA_MATCH BIT(1) 164 * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
171#define IEEE80211_RX_AMSDU BIT(2) 165 * (incl. multicast frames)
172#define IEEE80211_RX_FRAGMENTED BIT(3) 166 * @IEEE80211_RX_IN_SCAN: received while scanning
173/* only add flags here that do not change with subframes of an aMPDU */ 167 * @IEEE80211_RX_FRAGMENTED: fragmented frame
168 * @IEEE80211_RX_AMSDU: a-MSDU packet
169 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
170 *
171 * These are per-frame flags that are attached to a frame in the
172 * @rx_flags field of &struct ieee80211_rx_status.
173 */
174enum ieee80211_packet_rx_flags {
175 IEEE80211_RX_IN_SCAN = BIT(0),
176 IEEE80211_RX_RA_MATCH = BIT(1),
177 IEEE80211_RX_FRAGMENTED = BIT(2),
178 IEEE80211_RX_AMSDU = BIT(3),
179 IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
180};
181
182/**
183 * enum ieee80211_rx_flags - RX data flags
184 *
185 * @IEEE80211_RX_CMNTR: received on cooked monitor already
186 *
187 * These flags are used across handling multiple interfaces
188 * for a single frame.
189 */
190enum ieee80211_rx_flags {
191 IEEE80211_RX_CMNTR = BIT(0),
192};
174 193
175struct ieee80211_rx_data { 194struct ieee80211_rx_data {
176 struct sk_buff *skb; 195 struct sk_buff *skb;
@@ -343,10 +362,14 @@ struct ieee80211_if_managed {
343 unsigned long timers_running; /* used for quiesce/restart */ 362 unsigned long timers_running; /* used for quiesce/restart */
344 bool powersave; /* powersave requested for this iface */ 363 bool powersave; /* powersave requested for this iface */
345 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 364 enum ieee80211_smps_mode req_smps, /* requested smps mode */
346 ap_smps; /* smps mode AP thinks we're in */ 365 ap_smps, /* smps mode AP thinks we're in */
366 driver_smps_mode; /* smps mode request */
367
368 struct work_struct request_smps_work;
347 369
348 unsigned int flags; 370 unsigned int flags;
349 371
372 bool beacon_crc_valid;
350 u32 beacon_crc; 373 u32 beacon_crc;
351 374
352 enum { 375 enum {
@@ -371,6 +394,13 @@ struct ieee80211_if_managed {
371 int ave_beacon_signal; 394 int ave_beacon_signal;
372 395
373 /* 396 /*
397 * Number of Beacon frames used in ave_beacon_signal. This can be used
398 * to avoid generating less reliable cqm events that would be based
399 * only on couple of received frames.
400 */
401 unsigned int count_beacon_signal;
402
403 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 404 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been 405 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association. 406 * generated for the current association.
@@ -474,6 +504,19 @@ enum ieee80211_sub_if_data_flags {
474 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 504 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
475}; 505};
476 506
507/**
508 * enum ieee80211_sdata_state_bits - virtual interface state bits
509 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
510 * mirrors netif_running() but is separate for interface type
511 * change handling while the interface is up
512 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
513 * mode, so queues are stopped
514 */
515enum ieee80211_sdata_state_bits {
516 SDATA_STATE_RUNNING,
517 SDATA_STATE_OFFCHANNEL,
518};
519
477struct ieee80211_sub_if_data { 520struct ieee80211_sub_if_data {
478 struct list_head list; 521 struct list_head list;
479 522
@@ -487,6 +530,8 @@ struct ieee80211_sub_if_data {
487 530
488 unsigned int flags; 531 unsigned int flags;
489 532
533 unsigned long state;
534
490 int drop_unencrypted; 535 int drop_unencrypted;
491 536
492 char name[IFNAMSIZ]; 537 char name[IFNAMSIZ];
@@ -497,17 +542,20 @@ struct ieee80211_sub_if_data {
497 */ 542 */
498 bool ht_opmode_valid; 543 bool ht_opmode_valid;
499 544
545 /* to detect idle changes */
546 bool old_idle;
547
500 /* Fragment table for host-based reassembly */ 548 /* Fragment table for host-based reassembly */
501 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 549 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
502 unsigned int fragment_next; 550 unsigned int fragment_next;
503 551
504#define NUM_DEFAULT_KEYS 4
505#define NUM_DEFAULT_MGMT_KEYS 2
506 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; 552 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
507 struct ieee80211_key *default_key; 553 struct ieee80211_key *default_key;
508 struct ieee80211_key *default_mgmt_key; 554 struct ieee80211_key *default_mgmt_key;
509 555
510 u16 sequence_number; 556 u16 sequence_number;
557 __be16 control_port_protocol;
558 bool control_port_no_encrypt;
511 559
512 struct work_struct work; 560 struct work_struct work;
513 struct sk_buff_head skb_queue; 561 struct sk_buff_head skb_queue;
@@ -539,6 +587,7 @@ struct ieee80211_sub_if_data {
539#ifdef CONFIG_MAC80211_DEBUGFS 587#ifdef CONFIG_MAC80211_DEBUGFS
540 struct { 588 struct {
541 struct dentry *dir; 589 struct dentry *dir;
590 struct dentry *subdir_stations;
542 struct dentry *default_key; 591 struct dentry *default_key;
543 struct dentry *default_mgmt_key; 592 struct dentry *default_mgmt_key;
544 } debugfs; 593 } debugfs;
@@ -595,11 +644,17 @@ enum queue_stop_reason {
595 * determine if we are on the operating channel or not 644 * determine if we are on the operating channel or not
596 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, 645 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
597 * gets only set in conjunction with SCAN_SW_SCANNING 646 * gets only set in conjunction with SCAN_SW_SCANNING
647 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
648 * that the scan completed.
649 * @SCAN_ABORTED: Set for our scan work function when the driver reported
650 * a scan complete for an aborted scan.
598 */ 651 */
599enum { 652enum {
600 SCAN_SW_SCANNING, 653 SCAN_SW_SCANNING,
601 SCAN_HW_SCANNING, 654 SCAN_HW_SCANNING,
602 SCAN_OFF_CHANNEL, 655 SCAN_OFF_CHANNEL,
656 SCAN_COMPLETED,
657 SCAN_ABORTED,
603}; 658};
604 659
605/** 660/**
@@ -634,7 +689,6 @@ struct ieee80211_local {
634 /* 689 /*
635 * work stuff, potentially off-channel (in the future) 690 * work stuff, potentially off-channel (in the future)
636 */ 691 */
637 struct mutex work_mtx;
638 struct list_head work_list; 692 struct list_head work_list;
639 struct timer_list work_timer; 693 struct timer_list work_timer;
640 struct work_struct work_work; 694 struct work_struct work_work;
@@ -653,9 +707,13 @@ struct ieee80211_local {
653 int open_count; 707 int open_count;
654 int monitors, cooked_mntrs; 708 int monitors, cooked_mntrs;
655 /* number of interfaces with corresponding FIF_ flags */ 709 /* number of interfaces with corresponding FIF_ flags */
656 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; 710 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
711 fif_probe_req;
712 int probe_req_reg;
657 unsigned int filter_flags; /* FIF_* */ 713 unsigned int filter_flags; /* FIF_* */
658 714
715 bool wiphy_ciphers_allocated;
716
659 /* protects the aggregated multicast list and filter calls */ 717 /* protects the aggregated multicast list and filter calls */
660 spinlock_t filter_lock; 718 spinlock_t filter_lock;
661 719
@@ -746,9 +804,10 @@ struct ieee80211_local {
746 */ 804 */
747 struct mutex key_mtx; 805 struct mutex key_mtx;
748 806
807 /* mutex for scan and work locking */
808 struct mutex mtx;
749 809
750 /* Scanning and BSS list */ 810 /* Scanning and BSS list */
751 struct mutex scan_mtx;
752 unsigned long scanning; 811 unsigned long scanning;
753 struct cfg80211_ssid scan_ssid; 812 struct cfg80211_ssid scan_ssid;
754 struct cfg80211_scan_request *int_scan_req; 813 struct cfg80211_scan_request *int_scan_req;
@@ -866,10 +925,14 @@ struct ieee80211_local {
866#ifdef CONFIG_MAC80211_DEBUGFS 925#ifdef CONFIG_MAC80211_DEBUGFS
867 struct local_debugfsdentries { 926 struct local_debugfsdentries {
868 struct dentry *rcdir; 927 struct dentry *rcdir;
869 struct dentry *stations;
870 struct dentry *keys; 928 struct dentry *keys;
871 } debugfs; 929 } debugfs;
872#endif 930#endif
931
932 /* dummy netdev for use w/ NAPI */
933 struct net_device napi_dev;
934
935 struct napi_struct napi;
873}; 936};
874 937
875static inline struct ieee80211_sub_if_data * 938static inline struct ieee80211_sub_if_data *
@@ -1003,6 +1066,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1003void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1066void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1004void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1067void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1005 struct sk_buff *skb); 1068 struct sk_buff *skb);
1069void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
1070void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
1006 1071
1007/* IBSS code */ 1072/* IBSS code */
1008void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); 1073void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1068,10 +1133,12 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
1068void ieee80211_remove_interfaces(struct ieee80211_local *local); 1133void ieee80211_remove_interfaces(struct ieee80211_local *local);
1069u32 __ieee80211_recalc_idle(struct ieee80211_local *local); 1134u32 __ieee80211_recalc_idle(struct ieee80211_local *local);
1070void ieee80211_recalc_idle(struct ieee80211_local *local); 1135void ieee80211_recalc_idle(struct ieee80211_local *local);
1136void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
1137 const int offset);
1071 1138
1072static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1139static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1073{ 1140{
1074 return netif_running(sdata->dev); 1141 return test_bit(SDATA_STATE_RUNNING, &sdata->state);
1075} 1142}
1076 1143
1077/* tx handling */ 1144/* tx handling */
@@ -1105,12 +1172,13 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1105int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 1172int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1106 enum ieee80211_smps_mode smps, const u8 *da, 1173 enum ieee80211_smps_mode smps, const u8 *da,
1107 const u8 *bssid); 1174 const u8 *bssid);
1175void ieee80211_request_smps_work(struct work_struct *work);
1108 1176
1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1177void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1110 u16 initiator, u16 reason); 1178 u16 initiator, u16 reason, bool stop);
1111void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1179void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1112 u16 initiator, u16 reason); 1180 u16 initiator, u16 reason, bool stop);
1113void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); 1181void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx);
1114void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 1182void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
1115 struct sta_info *sta, 1183 struct sta_info *sta,
1116 struct ieee80211_mgmt *mgmt, size_t len); 1184 struct ieee80211_mgmt *mgmt, size_t len);
@@ -1124,13 +1192,16 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
1124 size_t len); 1192 size_t len);
1125 1193
1126int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 1194int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1127 enum ieee80211_back_parties initiator); 1195 enum ieee80211_back_parties initiator,
1196 bool tx);
1128int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 1197int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1129 enum ieee80211_back_parties initiator); 1198 enum ieee80211_back_parties initiator,
1199 bool tx);
1130void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); 1200void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); 1201void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work); 1202void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); 1203void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1204void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
1134 1205
1135/* Spectrum management */ 1206/* Spectrum management */
1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1207void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1146,6 +1217,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw);
1146 1217
1147static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1218static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1148{ 1219{
1220 struct ieee80211_local *local = hw_to_local(hw);
1221
1222 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
1223 "%s: resume with hardware scan still in progress\n",
1224 wiphy_name(hw->wiphy));
1225
1149 return ieee80211_reconfig(hw_to_local(hw)); 1226 return ieee80211_reconfig(hw_to_local(hw));
1150} 1227}
1151#else 1228#else
@@ -1208,7 +1285,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1208 const u8 *key, u8 key_len, u8 key_idx); 1285 const u8 *key, u8 key_len, u8 key_idx);
1209int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1286int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1210 const u8 *ie, size_t ie_len, 1287 const u8 *ie, size_t ie_len,
1211 enum ieee80211_band band); 1288 enum ieee80211_band band, u32 rate_mask,
1289 u8 channel);
1212void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 1290void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1213 const u8 *ssid, size_t ssid_len, 1291 const u8 *ssid, size_t ssid_len,
1214 const u8 *ie, size_t ie_len); 1292 const u8 *ie, size_t ie_len);
@@ -1221,8 +1299,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1221 enum ieee80211_band band); 1299 enum ieee80211_band band);
1222int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, 1300int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1223 enum ieee80211_smps_mode smps_mode); 1301 enum ieee80211_smps_mode smps_mode);
1224void ieee80211_recalc_smps(struct ieee80211_local *local, 1302void ieee80211_recalc_smps(struct ieee80211_local *local);
1225 struct ieee80211_sub_if_data *forsdata);
1226 1303
1227size_t ieee80211_ie_split(const u8 *ies, size_t ielen, 1304size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
1228 const u8 *ids, int n_ids, size_t offset); 1305 const u8 *ids, int n_ids, size_t offset);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0b..7aa85591dbe7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -24,6 +24,7 @@
24#include "led.h" 24#include "led.h"
25#include "driver-ops.h" 25#include "driver-ops.h"
26#include "wme.h" 26#include "wme.h"
27#include "rate.h"
27 28
28/** 29/**
29 * DOC: Interface list locking 30 * DOC: Interface list locking
@@ -94,21 +95,14 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
94 type2 == NL80211_IFTYPE_AP_VLAN)); 95 type2 == NL80211_IFTYPE_AP_VLAN));
95} 96}
96 97
97static int ieee80211_open(struct net_device *dev) 98static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
99 enum nl80211_iftype iftype)
98{ 100{
99 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
100 struct ieee80211_sub_if_data *nsdata;
101 struct ieee80211_local *local = sdata->local; 101 struct ieee80211_local *local = sdata->local;
102 struct sta_info *sta; 102 struct ieee80211_sub_if_data *nsdata;
103 u32 changed = 0; 103 struct net_device *dev = sdata->dev;
104 int res;
105 u32 hw_reconf_flags = 0;
106 u8 null_addr[ETH_ALEN] = {0};
107 104
108 /* fail early if user set an invalid address */ 105 ASSERT_RTNL();
109 if (compare_ether_addr(dev->dev_addr, null_addr) &&
110 !is_valid_ether_addr(dev->dev_addr))
111 return -EADDRNOTAVAIL;
112 106
113 /* we hold the RTNL here so can safely walk the list */ 107 /* we hold the RTNL here so can safely walk the list */
114 list_for_each_entry(nsdata, &local->interfaces, list) { 108 list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -125,7 +119,7 @@ static int ieee80211_open(struct net_device *dev)
125 * belonging to the same hardware. Then, however, we're 119 * belonging to the same hardware. Then, however, we're
126 * faced with having to adopt two different TSF timers... 120 * faced with having to adopt two different TSF timers...
127 */ 121 */
128 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 122 if (iftype == NL80211_IFTYPE_ADHOC &&
129 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 123 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
130 return -EBUSY; 124 return -EBUSY;
131 125
@@ -139,19 +133,56 @@ static int ieee80211_open(struct net_device *dev)
139 /* 133 /*
140 * check whether it may have the same address 134 * check whether it may have the same address
141 */ 135 */
142 if (!identical_mac_addr_allowed(sdata->vif.type, 136 if (!identical_mac_addr_allowed(iftype,
143 nsdata->vif.type)) 137 nsdata->vif.type))
144 return -ENOTUNIQ; 138 return -ENOTUNIQ;
145 139
146 /* 140 /*
147 * can only add VLANs to enabled APs 141 * can only add VLANs to enabled APs
148 */ 142 */
149 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 143 if (iftype == NL80211_IFTYPE_AP_VLAN &&
150 nsdata->vif.type == NL80211_IFTYPE_AP) 144 nsdata->vif.type == NL80211_IFTYPE_AP)
151 sdata->bss = &nsdata->u.ap; 145 sdata->bss = &nsdata->u.ap;
152 } 146 }
153 } 147 }
154 148
149 return 0;
150}
151
152void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
153 const int offset)
154{
155 struct ieee80211_local *local = sdata->local;
156 u32 flags = sdata->u.mntr_flags;
157
158#define ADJUST(_f, _s) do { \
159 if (flags & MONITOR_FLAG_##_f) \
160 local->fif_##_s += offset; \
161 } while (0)
162
163 ADJUST(FCSFAIL, fcsfail);
164 ADJUST(PLCPFAIL, plcpfail);
165 ADJUST(CONTROL, control);
166 ADJUST(CONTROL, pspoll);
167 ADJUST(OTHER_BSS, other_bss);
168
169#undef ADJUST
170}
171
172/*
173 * NOTE: Be very careful when changing this function, it must NOT return
174 * an error on interface type changes that have been pre-checked, so most
175 * checks should be in ieee80211_check_concurrent_iface.
176 */
177static int ieee80211_do_open(struct net_device *dev, bool coming_up)
178{
179 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
180 struct ieee80211_local *local = sdata->local;
181 struct sta_info *sta;
182 u32 changed = 0;
183 int res;
184 u32 hw_reconf_flags = 0;
185
155 switch (sdata->vif.type) { 186 switch (sdata->vif.type) {
156 case NL80211_IFTYPE_WDS: 187 case NL80211_IFTYPE_WDS:
157 if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) 188 if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -177,7 +208,9 @@ static int ieee80211_open(struct net_device *dev)
177 /* no special treatment */ 208 /* no special treatment */
178 break; 209 break;
179 case NL80211_IFTYPE_UNSPECIFIED: 210 case NL80211_IFTYPE_UNSPECIFIED:
180 case __NL80211_IFTYPE_AFTER_LAST: 211 case NUM_NL80211_IFTYPES:
212 case NL80211_IFTYPE_P2P_CLIENT:
213 case NL80211_IFTYPE_P2P_GO:
181 /* cannot happen */ 214 /* cannot happen */
182 WARN_ON(1); 215 WARN_ON(1);
183 break; 216 break;
@@ -187,39 +220,30 @@ static int ieee80211_open(struct net_device *dev)
187 res = drv_start(local); 220 res = drv_start(local);
188 if (res) 221 if (res)
189 goto err_del_bss; 222 goto err_del_bss;
223 if (local->ops->napi_poll)
224 napi_enable(&local->napi);
190 /* we're brought up, everything changes */ 225 /* we're brought up, everything changes */
191 hw_reconf_flags = ~0; 226 hw_reconf_flags = ~0;
192 ieee80211_led_radio(local, true); 227 ieee80211_led_radio(local, true);
193 } 228 }
194 229
195 /* 230 /*
196 * Check all interfaces and copy the hopefully now-present 231 * Copy the hopefully now-present MAC address to
197 * MAC address to those that have the special null one. 232 * this interface, if it has the special null one.
198 */ 233 */
199 list_for_each_entry(nsdata, &local->interfaces, list) { 234 if (is_zero_ether_addr(dev->dev_addr)) {
200 struct net_device *ndev = nsdata->dev; 235 memcpy(dev->dev_addr,
201 236 local->hw.wiphy->perm_addr,
202 /* 237 ETH_ALEN);
203 * No need to check running since we do not allow 238 memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
204 * it to start up with this invalid address. 239
205 */ 240 if (!is_valid_ether_addr(dev->dev_addr)) {
206 if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { 241 if (!local->open_count)
207 memcpy(ndev->dev_addr, 242 drv_stop(local);
208 local->hw.wiphy->perm_addr, 243 return -EADDRNOTAVAIL;
209 ETH_ALEN);
210 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
211 } 244 }
212 } 245 }
213 246
214 /*
215 * Validate the MAC address for this device.
216 */
217 if (!is_valid_ether_addr(dev->dev_addr)) {
218 if (!local->open_count)
219 drv_stop(local);
220 return -EADDRNOTAVAIL;
221 }
222
223 switch (sdata->vif.type) { 247 switch (sdata->vif.type) {
224 case NL80211_IFTYPE_AP_VLAN: 248 case NL80211_IFTYPE_AP_VLAN:
225 /* no need to tell driver */ 249 /* no need to tell driver */
@@ -237,25 +261,17 @@ static int ieee80211_open(struct net_device *dev)
237 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; 261 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
238 } 262 }
239 263
240 if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) 264 ieee80211_adjust_monitor_flags(sdata, 1);
241 local->fif_fcsfail++;
242 if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
243 local->fif_plcpfail++;
244 if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
245 local->fif_control++;
246 local->fif_pspoll++;
247 }
248 if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
249 local->fif_other_bss++;
250
251 ieee80211_configure_filter(local); 265 ieee80211_configure_filter(local);
252 266
253 netif_carrier_on(dev); 267 netif_carrier_on(dev);
254 break; 268 break;
255 default: 269 default:
256 res = drv_add_interface(local, &sdata->vif); 270 if (coming_up) {
257 if (res) 271 res = drv_add_interface(local, &sdata->vif);
258 goto err_stop; 272 if (res)
273 goto err_stop;
274 }
259 275
260 if (ieee80211_vif_is_mesh(&sdata->vif)) { 276 if (ieee80211_vif_is_mesh(&sdata->vif)) {
261 local->fif_other_bss++; 277 local->fif_other_bss++;
@@ -264,8 +280,11 @@ static int ieee80211_open(struct net_device *dev)
264 ieee80211_start_mesh(sdata); 280 ieee80211_start_mesh(sdata);
265 } else if (sdata->vif.type == NL80211_IFTYPE_AP) { 281 } else if (sdata->vif.type == NL80211_IFTYPE_AP) {
266 local->fif_pspoll++; 282 local->fif_pspoll++;
283 local->fif_probe_req++;
267 284
268 ieee80211_configure_filter(local); 285 ieee80211_configure_filter(local);
286 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
287 local->fif_probe_req++;
269 } 288 }
270 289
271 changed |= ieee80211_reset_erp_info(sdata); 290 changed |= ieee80211_reset_erp_info(sdata);
@@ -277,6 +296,8 @@ static int ieee80211_open(struct net_device *dev)
277 netif_carrier_on(dev); 296 netif_carrier_on(dev);
278 } 297 }
279 298
299 set_bit(SDATA_STATE_RUNNING, &sdata->state);
300
280 if (sdata->vif.type == NL80211_IFTYPE_WDS) { 301 if (sdata->vif.type == NL80211_IFTYPE_WDS) {
281 /* Create STA entry for the WDS peer */ 302 /* Create STA entry for the WDS peer */
282 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, 303 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
@@ -294,6 +315,8 @@ static int ieee80211_open(struct net_device *dev)
294 /* STA has been freed */ 315 /* STA has been freed */
295 goto err_del_interface; 316 goto err_del_interface;
296 } 317 }
318
319 rate_control_rate_init(sta);
297 } 320 }
298 321
299 /* 322 /*
@@ -307,9 +330,13 @@ static int ieee80211_open(struct net_device *dev)
307 if (sdata->flags & IEEE80211_SDATA_PROMISC) 330 if (sdata->flags & IEEE80211_SDATA_PROMISC)
308 atomic_inc(&local->iff_promiscs); 331 atomic_inc(&local->iff_promiscs);
309 332
333 mutex_lock(&local->mtx);
310 hw_reconf_flags |= __ieee80211_recalc_idle(local); 334 hw_reconf_flags |= __ieee80211_recalc_idle(local);
335 mutex_unlock(&local->mtx);
336
337 if (coming_up)
338 local->open_count++;
311 339
312 local->open_count++;
313 if (hw_reconf_flags) { 340 if (hw_reconf_flags) {
314 ieee80211_hw_config(local, hw_reconf_flags); 341 ieee80211_hw_config(local, hw_reconf_flags);
315 /* 342 /*
@@ -334,22 +361,45 @@ static int ieee80211_open(struct net_device *dev)
334 sdata->bss = NULL; 361 sdata->bss = NULL;
335 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 362 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
336 list_del(&sdata->u.vlan.list); 363 list_del(&sdata->u.vlan.list);
364 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
337 return res; 365 return res;
338} 366}
339 367
340static int ieee80211_stop(struct net_device *dev) 368static int ieee80211_open(struct net_device *dev)
341{ 369{
342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 370 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
371 int err;
372
373 /* fail early if user set an invalid address */
374 if (!is_zero_ether_addr(dev->dev_addr) &&
375 !is_valid_ether_addr(dev->dev_addr))
376 return -EADDRNOTAVAIL;
377
378 err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
379 if (err)
380 return err;
381
382 return ieee80211_do_open(dev, true);
383}
384
385static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
386 bool going_down)
387{
343 struct ieee80211_local *local = sdata->local; 388 struct ieee80211_local *local = sdata->local;
344 unsigned long flags; 389 unsigned long flags;
345 struct sk_buff *skb, *tmp; 390 struct sk_buff *skb, *tmp;
346 u32 hw_reconf_flags = 0; 391 u32 hw_reconf_flags = 0;
347 int i; 392 int i;
348 393
394 if (local->scan_sdata == sdata)
395 ieee80211_scan_cancel(local);
396
397 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
398
349 /* 399 /*
350 * Stop TX on this interface first. 400 * Stop TX on this interface first.
351 */ 401 */
352 netif_tx_stop_all_queues(dev); 402 netif_tx_stop_all_queues(sdata->dev);
353 403
354 /* 404 /*
355 * Purge work for this interface. 405 * Purge work for this interface.
@@ -366,12 +416,9 @@ static int ieee80211_stop(struct net_device *dev)
366 * (because if we remove a STA after ops->remove_interface() 416 * (because if we remove a STA after ops->remove_interface()
367 * the driver will have removed the vif info already!) 417 * the driver will have removed the vif info already!)
368 * 418 *
369 * We could relax this and only unlink the stations from the 419 * This is relevant only in AP, WDS and mesh modes, since in
370 * hash table and list but keep them on a per-sdata list that 420 * all other modes we've already removed all stations when
371 * will be inserted back again when the interface is brought 421 * disconnecting etc.
372 * up again, but I don't currently see a use case for that,
373 * except with WDS which gets a STA entry created when it is
374 * brought up.
375 */ 422 */
376 sta_info_flush(local, sdata); 423 sta_info_flush(local, sdata);
377 424
@@ -387,14 +434,19 @@ static int ieee80211_stop(struct net_device *dev)
387 if (sdata->flags & IEEE80211_SDATA_PROMISC) 434 if (sdata->flags & IEEE80211_SDATA_PROMISC)
388 atomic_dec(&local->iff_promiscs); 435 atomic_dec(&local->iff_promiscs);
389 436
390 if (sdata->vif.type == NL80211_IFTYPE_AP) 437 if (sdata->vif.type == NL80211_IFTYPE_AP) {
391 local->fif_pspoll--; 438 local->fif_pspoll--;
439 local->fif_probe_req--;
440 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
441 local->fif_probe_req--;
442 }
392 443
393 netif_addr_lock_bh(dev); 444 netif_addr_lock_bh(sdata->dev);
394 spin_lock_bh(&local->filter_lock); 445 spin_lock_bh(&local->filter_lock);
395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); 446 __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
447 sdata->dev->addr_len);
396 spin_unlock_bh(&local->filter_lock); 448 spin_unlock_bh(&local->filter_lock);
397 netif_addr_unlock_bh(dev); 449 netif_addr_unlock_bh(sdata->dev);
398 450
399 ieee80211_configure_filter(local); 451 ieee80211_configure_filter(local);
400 452
@@ -406,11 +458,21 @@ static int ieee80211_stop(struct net_device *dev)
406 struct ieee80211_sub_if_data *vlan, *tmpsdata; 458 struct ieee80211_sub_if_data *vlan, *tmpsdata;
407 struct beacon_data *old_beacon = sdata->u.ap.beacon; 459 struct beacon_data *old_beacon = sdata->u.ap.beacon;
408 460
461 /* sdata_running will return false, so this will disable */
462 ieee80211_bss_info_change_notify(sdata,
463 BSS_CHANGED_BEACON_ENABLED);
464
409 /* remove beacon */ 465 /* remove beacon */
410 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 466 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
411 synchronize_rcu(); 467 synchronize_rcu();
412 kfree(old_beacon); 468 kfree(old_beacon);
413 469
470 /* free all potentially still buffered bcast frames */
471 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
472 local->total_ps_buffered--;
473 dev_kfree_skb(skb);
474 }
475
414 /* down all dependent devices, that is VLANs */ 476 /* down all dependent devices, that is VLANs */
415 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 477 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
416 u.vlan.list) 478 u.vlan.list)
@@ -418,7 +480,8 @@ static int ieee80211_stop(struct net_device *dev)
418 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 480 WARN_ON(!list_empty(&sdata->u.ap.vlans));
419 } 481 }
420 482
421 local->open_count--; 483 if (going_down)
484 local->open_count--;
422 485
423 switch (sdata->vif.type) { 486 switch (sdata->vif.type) {
424 case NL80211_IFTYPE_AP_VLAN: 487 case NL80211_IFTYPE_AP_VLAN:
@@ -437,40 +500,9 @@ static int ieee80211_stop(struct net_device *dev)
437 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; 500 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
438 } 501 }
439 502
440 if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) 503 ieee80211_adjust_monitor_flags(sdata, -1);
441 local->fif_fcsfail--;
442 if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
443 local->fif_plcpfail--;
444 if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
445 local->fif_pspoll--;
446 local->fif_control--;
447 }
448 if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
449 local->fif_other_bss--;
450
451 ieee80211_configure_filter(local); 504 ieee80211_configure_filter(local);
452 break; 505 break;
453 case NL80211_IFTYPE_STATION:
454 del_timer_sync(&sdata->u.mgd.chswitch_timer);
455 del_timer_sync(&sdata->u.mgd.timer);
456 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
457 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
458 /*
459 * If any of the timers fired while we waited for it, it will
460 * have queued its work. Now the work will be running again
461 * but will not rearm the timer again because it checks
462 * whether the interface is running, which, at this point,
463 * it no longer is.
464 */
465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
466 cancel_work_sync(&sdata->u.mgd.monitor_work);
467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
468
469 /* fall through */
470 case NL80211_IFTYPE_ADHOC:
471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
472 del_timer_sync(&sdata->u.ibss.timer);
473 /* fall through */
474 case NL80211_IFTYPE_MESH_POINT: 506 case NL80211_IFTYPE_MESH_POINT:
475 if (ieee80211_vif_is_mesh(&sdata->vif)) { 507 if (ieee80211_vif_is_mesh(&sdata->vif)) {
476 /* other_bss and allmulti are always set on mesh 508 /* other_bss and allmulti are always set on mesh
@@ -494,31 +526,35 @@ static int ieee80211_stop(struct net_device *dev)
494 synchronize_rcu(); 526 synchronize_rcu();
495 skb_queue_purge(&sdata->skb_queue); 527 skb_queue_purge(&sdata->skb_queue);
496 528
497 if (local->scan_sdata == sdata)
498 ieee80211_scan_cancel(local);
499
500 /* 529 /*
501 * Disable beaconing for AP and mesh, IBSS can't 530 * Disable beaconing here for mesh only, AP and IBSS
502 * still be joined to a network at this point. 531 * are already taken care of.
503 */ 532 */
504 if (sdata->vif.type == NL80211_IFTYPE_AP || 533 if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
505 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
506 ieee80211_bss_info_change_notify(sdata, 534 ieee80211_bss_info_change_notify(sdata,
507 BSS_CHANGED_BEACON_ENABLED); 535 BSS_CHANGED_BEACON_ENABLED);
508 }
509 536
510 /* free all remaining keys, there shouldn't be any */ 537 /*
538 * Free all remaining keys, there shouldn't be any,
539 * except maybe group keys in AP more or WDS?
540 */
511 ieee80211_free_keys(sdata); 541 ieee80211_free_keys(sdata);
512 drv_remove_interface(local, &sdata->vif); 542
543 if (going_down)
544 drv_remove_interface(local, &sdata->vif);
513 } 545 }
514 546
515 sdata->bss = NULL; 547 sdata->bss = NULL;
516 548
549 mutex_lock(&local->mtx);
517 hw_reconf_flags |= __ieee80211_recalc_idle(local); 550 hw_reconf_flags |= __ieee80211_recalc_idle(local);
551 mutex_unlock(&local->mtx);
518 552
519 ieee80211_recalc_ps(local, -1); 553 ieee80211_recalc_ps(local, -1);
520 554
521 if (local->open_count == 0) { 555 if (local->open_count == 0) {
556 if (local->ops->napi_poll)
557 napi_disable(&local->napi);
522 ieee80211_clear_tx_pending(local); 558 ieee80211_clear_tx_pending(local);
523 ieee80211_stop_device(local); 559 ieee80211_stop_device(local);
524 560
@@ -541,6 +577,13 @@ static int ieee80211_stop(struct net_device *dev)
541 } 577 }
542 } 578 }
543 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 579 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
580}
581
582static int ieee80211_stop(struct net_device *dev)
583{
584 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
585
586 ieee80211_do_stop(sdata, true);
544 587
545 return 0; 588 return 0;
546} 589}
@@ -585,8 +628,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
585{ 628{
586 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 629 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
587 struct ieee80211_local *local = sdata->local; 630 struct ieee80211_local *local = sdata->local;
588 struct beacon_data *beacon;
589 struct sk_buff *skb;
590 int flushed; 631 int flushed;
591 int i; 632 int i;
592 633
@@ -599,37 +640,8 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
599 __skb_queue_purge(&sdata->fragments[i].skb_list); 640 __skb_queue_purge(&sdata->fragments[i].skb_list);
600 sdata->fragment_next = 0; 641 sdata->fragment_next = 0;
601 642
602 switch (sdata->vif.type) { 643 if (ieee80211_vif_is_mesh(&sdata->vif))
603 case NL80211_IFTYPE_AP: 644 mesh_rmc_free(sdata);
604 beacon = sdata->u.ap.beacon;
605 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
606 synchronize_rcu();
607 kfree(beacon);
608
609 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
610 local->total_ps_buffered--;
611 dev_kfree_skb(skb);
612 }
613
614 break;
615 case NL80211_IFTYPE_MESH_POINT:
616 if (ieee80211_vif_is_mesh(&sdata->vif))
617 mesh_rmc_free(sdata);
618 break;
619 case NL80211_IFTYPE_ADHOC:
620 if (WARN_ON(sdata->u.ibss.presp))
621 kfree_skb(sdata->u.ibss.presp);
622 break;
623 case NL80211_IFTYPE_STATION:
624 case NL80211_IFTYPE_WDS:
625 case NL80211_IFTYPE_AP_VLAN:
626 case NL80211_IFTYPE_MONITOR:
627 break;
628 case NL80211_IFTYPE_UNSPECIFIED:
629 case __NL80211_IFTYPE_AFTER_LAST:
630 BUG();
631 break;
632 }
633 645
634 flushed = sta_info_flush(local, sdata); 646 flushed = sta_info_flush(local, sdata);
635 WARN_ON(flushed); 647 WARN_ON(flushed);
@@ -791,7 +803,8 @@ static void ieee80211_iface_work(struct work_struct *work)
791 803
792 __ieee80211_stop_rx_ba_session( 804 __ieee80211_stop_rx_ba_session(
793 sta, tid, WLAN_BACK_RECIPIENT, 805 sta, tid, WLAN_BACK_RECIPIENT,
794 WLAN_REASON_QSTA_REQUIRE_SETUP); 806 WLAN_REASON_QSTA_REQUIRE_SETUP,
807 true);
795 } 808 }
796 mutex_unlock(&local->sta_mtx); 809 mutex_unlock(&local->sta_mtx);
797 } else switch (sdata->vif.type) { 810 } else switch (sdata->vif.type) {
@@ -844,9 +857,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
844 857
845 /* and set some type-dependent values */ 858 /* and set some type-dependent values */
846 sdata->vif.type = type; 859 sdata->vif.type = type;
860 sdata->vif.p2p = false;
847 sdata->dev->netdev_ops = &ieee80211_dataif_ops; 861 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
848 sdata->wdev.iftype = type; 862 sdata->wdev.iftype = type;
849 863
864 sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
865 sdata->control_port_no_encrypt = false;
866
850 /* only monitor differs */ 867 /* only monitor differs */
851 sdata->dev->type = ARPHRD_ETHER; 868 sdata->dev->type = ARPHRD_ETHER;
852 869
@@ -854,10 +871,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
854 INIT_WORK(&sdata->work, ieee80211_iface_work); 871 INIT_WORK(&sdata->work, ieee80211_iface_work);
855 872
856 switch (type) { 873 switch (type) {
874 case NL80211_IFTYPE_P2P_GO:
875 type = NL80211_IFTYPE_AP;
876 sdata->vif.type = type;
877 sdata->vif.p2p = true;
878 /* fall through */
857 case NL80211_IFTYPE_AP: 879 case NL80211_IFTYPE_AP:
858 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 880 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
859 INIT_LIST_HEAD(&sdata->u.ap.vlans); 881 INIT_LIST_HEAD(&sdata->u.ap.vlans);
860 break; 882 break;
883 case NL80211_IFTYPE_P2P_CLIENT:
884 type = NL80211_IFTYPE_STATION;
885 sdata->vif.type = type;
886 sdata->vif.p2p = true;
887 /* fall through */
861 case NL80211_IFTYPE_STATION: 888 case NL80211_IFTYPE_STATION:
862 ieee80211_sta_setup_sdata(sdata); 889 ieee80211_sta_setup_sdata(sdata);
863 break; 890 break;
@@ -878,7 +905,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
878 case NL80211_IFTYPE_AP_VLAN: 905 case NL80211_IFTYPE_AP_VLAN:
879 break; 906 break;
880 case NL80211_IFTYPE_UNSPECIFIED: 907 case NL80211_IFTYPE_UNSPECIFIED:
881 case __NL80211_IFTYPE_AFTER_LAST: 908 case NUM_NL80211_IFTYPES:
882 BUG(); 909 BUG();
883 break; 910 break;
884 } 911 }
@@ -886,12 +913,85 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
886 ieee80211_debugfs_add_netdev(sdata); 913 ieee80211_debugfs_add_netdev(sdata);
887} 914}
888 915
916static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
917 enum nl80211_iftype type)
918{
919 struct ieee80211_local *local = sdata->local;
920 int ret, err;
921 enum nl80211_iftype internal_type = type;
922 bool p2p = false;
923
924 ASSERT_RTNL();
925
926 if (!local->ops->change_interface)
927 return -EBUSY;
928
929 switch (sdata->vif.type) {
930 case NL80211_IFTYPE_AP:
931 case NL80211_IFTYPE_STATION:
932 case NL80211_IFTYPE_ADHOC:
933 /*
934 * Could maybe also all others here?
935 * Just not sure how that interacts
936 * with the RX/config path e.g. for
937 * mesh.
938 */
939 break;
940 default:
941 return -EBUSY;
942 }
943
944 switch (type) {
945 case NL80211_IFTYPE_AP:
946 case NL80211_IFTYPE_STATION:
947 case NL80211_IFTYPE_ADHOC:
948 /*
949 * Could probably support everything
950 * but WDS here (WDS do_open can fail
951 * under memory pressure, which this
952 * code isn't prepared to handle).
953 */
954 break;
955 case NL80211_IFTYPE_P2P_CLIENT:
956 p2p = true;
957 internal_type = NL80211_IFTYPE_STATION;
958 break;
959 case NL80211_IFTYPE_P2P_GO:
960 p2p = true;
961 internal_type = NL80211_IFTYPE_AP;
962 break;
963 default:
964 return -EBUSY;
965 }
966
967 ret = ieee80211_check_concurrent_iface(sdata, internal_type);
968 if (ret)
969 return ret;
970
971 ieee80211_do_stop(sdata, false);
972
973 ieee80211_teardown_sdata(sdata->dev);
974
975 ret = drv_change_interface(local, sdata, internal_type, p2p);
976 if (ret)
977 type = sdata->vif.type;
978
979 ieee80211_setup_sdata(sdata, type);
980
981 err = ieee80211_do_open(sdata->dev, false);
982 WARN(err, "type change: do_open returned %d", err);
983
984 return ret;
985}
986
889int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, 987int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
890 enum nl80211_iftype type) 988 enum nl80211_iftype type)
891{ 989{
990 int ret;
991
892 ASSERT_RTNL(); 992 ASSERT_RTNL();
893 993
894 if (type == sdata->vif.type) 994 if (type == ieee80211_vif_type_p2p(&sdata->vif))
895 return 0; 995 return 0;
896 996
897 /* Setting ad-hoc mode on non-IBSS channel is not supported. */ 997 /* Setting ad-hoc mode on non-IBSS channel is not supported. */
@@ -899,18 +999,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
899 type == NL80211_IFTYPE_ADHOC) 999 type == NL80211_IFTYPE_ADHOC)
900 return -EOPNOTSUPP; 1000 return -EOPNOTSUPP;
901 1001
902 /* 1002 if (ieee80211_sdata_running(sdata)) {
903 * We could, here, on changes between IBSS/STA/MESH modes, 1003 ret = ieee80211_runtime_change_iftype(sdata, type);
904 * invoke an MLME function instead that disassociates etc. 1004 if (ret)
905 * and goes into the requested mode. 1005 return ret;
906 */ 1006 } else {
907 1007 /* Purge and reset type-dependent state. */
908 if (ieee80211_sdata_running(sdata)) 1008 ieee80211_teardown_sdata(sdata->dev);
909 return -EBUSY; 1009 ieee80211_setup_sdata(sdata, type);
910 1010 }
911 /* Purge and reset type-dependent state. */
912 ieee80211_teardown_sdata(sdata->dev);
913 ieee80211_setup_sdata(sdata, type);
914 1011
915 /* reset some values that shouldn't be kept across type changes */ 1012 /* reset some values that shouldn't be kept across type changes */
916 sdata->vif.bss_conf.basic_rates = 1013 sdata->vif.bss_conf.basic_rates =
@@ -1167,8 +1264,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
1167 return 0; 1264 return 0;
1168 1265
1169#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1266#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1170 printk(KERN_DEBUG "%s: device no longer idle - %s\n", 1267 wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason);
1171 wiphy_name(local->hw.wiphy), reason);
1172#endif 1268#endif
1173 1269
1174 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; 1270 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
@@ -1181,8 +1277,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
1181 return 0; 1277 return 0;
1182 1278
1183#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1279#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1184 printk(KERN_DEBUG "%s: device now idle\n", 1280 wiphy_debug(local->hw.wiphy, "device now idle\n");
1185 wiphy_name(local->hw.wiphy));
1186#endif 1281#endif
1187 1282
1188 drv_flush(local, false); 1283 drv_flush(local, false);
@@ -1195,28 +1290,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
1195{ 1290{
1196 struct ieee80211_sub_if_data *sdata; 1291 struct ieee80211_sub_if_data *sdata;
1197 int count = 0; 1292 int count = 0;
1293 bool working = false, scanning = false;
1294 struct ieee80211_work *wk;
1198 1295
1199 if (!list_empty(&local->work_list)) 1296#ifdef CONFIG_PROVE_LOCKING
1200 return ieee80211_idle_off(local, "working"); 1297 WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
1201 1298 !lockdep_is_held(&local->iflist_mtx));
1202 if (local->scanning) 1299#endif
1203 return ieee80211_idle_off(local, "scanning"); 1300 lockdep_assert_held(&local->mtx);
1204 1301
1205 list_for_each_entry(sdata, &local->interfaces, list) { 1302 list_for_each_entry(sdata, &local->interfaces, list) {
1206 if (!ieee80211_sdata_running(sdata)) 1303 if (!ieee80211_sdata_running(sdata)) {
1304 sdata->vif.bss_conf.idle = true;
1207 continue; 1305 continue;
1306 }
1307
1308 sdata->old_idle = sdata->vif.bss_conf.idle;
1309
1208 /* do not count disabled managed interfaces */ 1310 /* do not count disabled managed interfaces */
1209 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1311 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1210 !sdata->u.mgd.associated) 1312 !sdata->u.mgd.associated) {
1313 sdata->vif.bss_conf.idle = true;
1211 continue; 1314 continue;
1315 }
1212 /* do not count unused IBSS interfaces */ 1316 /* do not count unused IBSS interfaces */
1213 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1317 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
1214 !sdata->u.ibss.ssid_len) 1318 !sdata->u.ibss.ssid_len) {
1319 sdata->vif.bss_conf.idle = true;
1215 continue; 1320 continue;
1321 }
1216 /* count everything else */ 1322 /* count everything else */
1217 count++; 1323 count++;
1218 } 1324 }
1219 1325
1326 list_for_each_entry(wk, &local->work_list, list) {
1327 working = true;
1328 wk->sdata->vif.bss_conf.idle = false;
1329 }
1330
1331 if (local->scan_sdata) {
1332 scanning = true;
1333 local->scan_sdata->vif.bss_conf.idle = false;
1334 }
1335
1336 list_for_each_entry(sdata, &local->interfaces, list) {
1337 if (sdata->old_idle == sdata->vif.bss_conf.idle)
1338 continue;
1339 if (!ieee80211_sdata_running(sdata))
1340 continue;
1341 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
1342 }
1343
1344 if (working)
1345 return ieee80211_idle_off(local, "working");
1346 if (scanning)
1347 return ieee80211_idle_off(local, "scanning");
1220 if (!count) 1348 if (!count)
1221 return ieee80211_idle_on(local); 1349 return ieee80211_idle_on(local);
1222 else 1350 else
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 1b9d87ed143a..ccd676b2f599 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,7 +49,7 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
49 49
50static void assert_key_lock(struct ieee80211_local *local) 50static void assert_key_lock(struct ieee80211_local *local)
51{ 51{
52 WARN_ON(!mutex_is_locked(&local->key_mtx)); 52 lockdep_assert_held(&local->key_mtx);
53} 53}
54 54
55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) 55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -60,7 +60,7 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
60 return NULL; 60 return NULL;
61} 61}
62 62
63static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 63static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
64{ 64{
65 struct ieee80211_sub_if_data *sdata; 65 struct ieee80211_sub_if_data *sdata;
66 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
@@ -69,12 +69,20 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
69 might_sleep(); 69 might_sleep();
70 70
71 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key)
72 return; 72 goto out_unsupported;
73 73
74 assert_key_lock(key->local); 74 assert_key_lock(key->local);
75 75
76 sta = get_sta_for_key(key); 76 sta = get_sta_for_key(key);
77 77
78 /*
79 * If this is a per-STA GTK, check if it
80 * is supported; if not, return.
81 */
82 if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) &&
83 !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK))
84 goto out_unsupported;
85
78 sdata = key->sdata; 86 sdata = key->sdata;
79 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 87 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
80 sdata = container_of(sdata->bss, 88 sdata = container_of(sdata->bss,
@@ -83,14 +91,28 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
83 91
84 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); 92 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
85 93
86 if (!ret) 94 if (!ret) {
87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 95 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
96 return 0;
97 }
88 98
89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 99 if (ret != -ENOSPC && ret != -EOPNOTSUPP)
90 printk(KERN_ERR "mac80211-%s: failed to set key " 100 wiphy_err(key->local->hw.wiphy,
91 "(%d, %pM) to hardware (%d)\n", 101 "failed to set key (%d, %pM) to hardware (%d)\n",
92 wiphy_name(key->local->hw.wiphy), 102 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
93 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 103
104 out_unsupported:
105 switch (key->conf.cipher) {
106 case WLAN_CIPHER_SUITE_WEP40:
107 case WLAN_CIPHER_SUITE_WEP104:
108 case WLAN_CIPHER_SUITE_TKIP:
109 case WLAN_CIPHER_SUITE_CCMP:
110 case WLAN_CIPHER_SUITE_AES_CMAC:
111 /* all of these we can do in software */
112 return 0;
113 default:
114 return -EINVAL;
115 }
94} 116}
95 117
96static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 118static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -121,14 +143,33 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
121 sta, &key->conf); 143 sta, &key->conf);
122 144
123 if (ret) 145 if (ret)
124 printk(KERN_ERR "mac80211-%s: failed to remove key " 146 wiphy_err(key->local->hw.wiphy,
125 "(%d, %pM) from hardware (%d)\n", 147 "failed to remove key (%d, %pM) from hardware (%d)\n",
126 wiphy_name(key->local->hw.wiphy), 148 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
128 149
129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 150 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
130} 151}
131 152
153void ieee80211_key_removed(struct ieee80211_key_conf *key_conf)
154{
155 struct ieee80211_key *key;
156
157 key = container_of(key_conf, struct ieee80211_key, conf);
158
159 might_sleep();
160 assert_key_lock(key->local);
161
162 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
163
164 /*
165 * Flush TX path to avoid attempts to use this key
166 * after this function returns. Until then, drivers
167 * must be prepared to handle the key.
168 */
169 synchronize_rcu();
170}
171EXPORT_SYMBOL_GPL(ieee80211_key_removed);
172
132static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, 173static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
133 int idx) 174 int idx)
134{ 175{
@@ -184,6 +225,7 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
184 225
185static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 226static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
186 struct sta_info *sta, 227 struct sta_info *sta,
228 bool pairwise,
187 struct ieee80211_key *old, 229 struct ieee80211_key *old,
188 struct ieee80211_key *new) 230 struct ieee80211_key *new)
189{ 231{
@@ -192,8 +234,14 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
192 if (new) 234 if (new)
193 list_add(&new->list, &sdata->key_list); 235 list_add(&new->list, &sdata->key_list);
194 236
195 if (sta) { 237 if (sta && pairwise) {
196 rcu_assign_pointer(sta->key, new); 238 rcu_assign_pointer(sta->ptk, new);
239 } else if (sta) {
240 if (old)
241 idx = old->conf.keyidx;
242 else
243 idx = new->conf.keyidx;
244 rcu_assign_pointer(sta->gtk[idx], new);
197 } else { 245 } else {
198 WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); 246 WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
199 247
@@ -227,20 +275,18 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
227 } 275 }
228} 276}
229 277
230struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 278struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
231 int idx,
232 size_t key_len,
233 const u8 *key_data, 279 const u8 *key_data,
234 size_t seq_len, const u8 *seq) 280 size_t seq_len, const u8 *seq)
235{ 281{
236 struct ieee80211_key *key; 282 struct ieee80211_key *key;
237 int i, j; 283 int i, j, err;
238 284
239 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); 285 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
240 286
241 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 287 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
242 if (!key) 288 if (!key)
243 return NULL; 289 return ERR_PTR(-ENOMEM);
244 290
245 /* 291 /*
246 * Default to software encryption; we'll later upload the 292 * Default to software encryption; we'll later upload the
@@ -249,15 +295,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
249 key->conf.flags = 0; 295 key->conf.flags = 0;
250 key->flags = 0; 296 key->flags = 0;
251 297
252 key->conf.alg = alg; 298 key->conf.cipher = cipher;
253 key->conf.keyidx = idx; 299 key->conf.keyidx = idx;
254 key->conf.keylen = key_len; 300 key->conf.keylen = key_len;
255 switch (alg) { 301 switch (cipher) {
256 case ALG_WEP: 302 case WLAN_CIPHER_SUITE_WEP40:
303 case WLAN_CIPHER_SUITE_WEP104:
257 key->conf.iv_len = WEP_IV_LEN; 304 key->conf.iv_len = WEP_IV_LEN;
258 key->conf.icv_len = WEP_ICV_LEN; 305 key->conf.icv_len = WEP_ICV_LEN;
259 break; 306 break;
260 case ALG_TKIP: 307 case WLAN_CIPHER_SUITE_TKIP:
261 key->conf.iv_len = TKIP_IV_LEN; 308 key->conf.iv_len = TKIP_IV_LEN;
262 key->conf.icv_len = TKIP_ICV_LEN; 309 key->conf.icv_len = TKIP_ICV_LEN;
263 if (seq) { 310 if (seq) {
@@ -269,7 +316,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
269 } 316 }
270 } 317 }
271 break; 318 break;
272 case ALG_CCMP: 319 case WLAN_CIPHER_SUITE_CCMP:
273 key->conf.iv_len = CCMP_HDR_LEN; 320 key->conf.iv_len = CCMP_HDR_LEN;
274 key->conf.icv_len = CCMP_MIC_LEN; 321 key->conf.icv_len = CCMP_MIC_LEN;
275 if (seq) { 322 if (seq) {
@@ -278,42 +325,38 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
278 key->u.ccmp.rx_pn[i][j] = 325 key->u.ccmp.rx_pn[i][j] =
279 seq[CCMP_PN_LEN - j - 1]; 326 seq[CCMP_PN_LEN - j - 1];
280 } 327 }
281 break;
282 case ALG_AES_CMAC:
283 key->conf.iv_len = 0;
284 key->conf.icv_len = sizeof(struct ieee80211_mmie);
285 if (seq)
286 for (j = 0; j < 6; j++)
287 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
288 break;
289 }
290 memcpy(key->conf.key, key_data, key_len);
291 INIT_LIST_HEAD(&key->list);
292
293 if (alg == ALG_CCMP) {
294 /* 328 /*
295 * Initialize AES key state here as an optimization so that 329 * Initialize AES key state here as an optimization so that
296 * it does not need to be initialized for every packet. 330 * it does not need to be initialized for every packet.
297 */ 331 */
298 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); 332 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
299 if (!key->u.ccmp.tfm) { 333 if (IS_ERR(key->u.ccmp.tfm)) {
334 err = PTR_ERR(key->u.ccmp.tfm);
300 kfree(key); 335 kfree(key);
301 return NULL; 336 key = ERR_PTR(err);
302 } 337 }
303 } 338 break;
304 339 case WLAN_CIPHER_SUITE_AES_CMAC:
305 if (alg == ALG_AES_CMAC) { 340 key->conf.iv_len = 0;
341 key->conf.icv_len = sizeof(struct ieee80211_mmie);
342 if (seq)
343 for (j = 0; j < 6; j++)
344 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
306 /* 345 /*
307 * Initialize AES key state here as an optimization so that 346 * Initialize AES key state here as an optimization so that
308 * it does not need to be initialized for every packet. 347 * it does not need to be initialized for every packet.
309 */ 348 */
310 key->u.aes_cmac.tfm = 349 key->u.aes_cmac.tfm =
311 ieee80211_aes_cmac_key_setup(key_data); 350 ieee80211_aes_cmac_key_setup(key_data);
312 if (!key->u.aes_cmac.tfm) { 351 if (IS_ERR(key->u.aes_cmac.tfm)) {
352 err = PTR_ERR(key->u.aes_cmac.tfm);
313 kfree(key); 353 kfree(key);
314 return NULL; 354 key = ERR_PTR(err);
315 } 355 }
356 break;
316 } 357 }
358 memcpy(key->conf.key, key_data, key_len);
359 INIT_LIST_HEAD(&key->list);
317 360
318 return key; 361 return key;
319} 362}
@@ -326,9 +369,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
326 if (key->local) 369 if (key->local)
327 ieee80211_key_disable_hw_accel(key); 370 ieee80211_key_disable_hw_accel(key);
328 371
329 if (key->conf.alg == ALG_CCMP) 372 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm); 373 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC) 374 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 375 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local) 376 if (key->local)
334 ieee80211_debugfs_key_remove(key); 377 ieee80211_debugfs_key_remove(key);
@@ -336,12 +379,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
336 kfree(key); 379 kfree(key);
337} 380}
338 381
339void ieee80211_key_link(struct ieee80211_key *key, 382int ieee80211_key_link(struct ieee80211_key *key,
340 struct ieee80211_sub_if_data *sdata, 383 struct ieee80211_sub_if_data *sdata,
341 struct sta_info *sta) 384 struct sta_info *sta)
342{ 385{
343 struct ieee80211_key *old_key; 386 struct ieee80211_key *old_key;
344 int idx; 387 int idx, ret;
388 bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
345 389
346 BUG_ON(!sdata); 390 BUG_ON(!sdata);
347 BUG_ON(!key); 391 BUG_ON(!key);
@@ -358,13 +402,6 @@ void ieee80211_key_link(struct ieee80211_key *key,
358 */ 402 */
359 if (test_sta_flags(sta, WLAN_STA_WME)) 403 if (test_sta_flags(sta, WLAN_STA_WME))
360 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; 404 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
361
362 /*
363 * This key is for a specific sta interface,
364 * inform the driver that it should try to store
365 * this key as pairwise key.
366 */
367 key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
368 } else { 405 } else {
369 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 406 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
370 struct sta_info *ap; 407 struct sta_info *ap;
@@ -386,19 +423,23 @@ void ieee80211_key_link(struct ieee80211_key *key,
386 423
387 mutex_lock(&sdata->local->key_mtx); 424 mutex_lock(&sdata->local->key_mtx);
388 425
389 if (sta) 426 if (sta && pairwise)
390 old_key = sta->key; 427 old_key = sta->ptk;
428 else if (sta)
429 old_key = sta->gtk[idx];
391 else 430 else
392 old_key = sdata->keys[idx]; 431 old_key = sdata->keys[idx];
393 432
394 __ieee80211_key_replace(sdata, sta, old_key, key); 433 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
395 __ieee80211_key_destroy(old_key); 434 __ieee80211_key_destroy(old_key);
396 435
397 ieee80211_debugfs_key_add(key); 436 ieee80211_debugfs_key_add(key);
398 437
399 ieee80211_key_enable_hw_accel(key); 438 ret = ieee80211_key_enable_hw_accel(key);
400 439
401 mutex_unlock(&sdata->local->key_mtx); 440 mutex_unlock(&sdata->local->key_mtx);
441
442 return ret;
402} 443}
403 444
404static void __ieee80211_key_free(struct ieee80211_key *key) 445static void __ieee80211_key_free(struct ieee80211_key *key)
@@ -408,7 +449,8 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
408 */ 449 */
409 if (key->sdata) 450 if (key->sdata)
410 __ieee80211_key_replace(key->sdata, key->sta, 451 __ieee80211_key_replace(key->sdata, key->sta,
411 key, NULL); 452 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
453 key, NULL);
412 __ieee80211_key_destroy(key); 454 __ieee80211_key_destroy(key);
413} 455}
414 456
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a471..0db1c0f5f697 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -16,6 +16,9 @@
16#include <linux/rcupdate.h> 16#include <linux/rcupdate.h>
17#include <net/mac80211.h> 17#include <net/mac80211.h>
18 18
19#define NUM_DEFAULT_KEYS 4
20#define NUM_DEFAULT_MGMT_KEYS 2
21
19#define WEP_IV_LEN 4 22#define WEP_IV_LEN 4
20#define WEP_ICV_LEN 4 23#define WEP_ICV_LEN 4
21#define ALG_TKIP_KEY_LEN 32 24#define ALG_TKIP_KEY_LEN 32
@@ -123,18 +126,16 @@ struct ieee80211_key {
123 struct ieee80211_key_conf conf; 126 struct ieee80211_key_conf conf;
124}; 127};
125 128
126struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 129struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
127 int idx,
128 size_t key_len,
129 const u8 *key_data, 130 const u8 *key_data,
130 size_t seq_len, const u8 *seq); 131 size_t seq_len, const u8 *seq);
131/* 132/*
132 * Insert a key into data structures (sdata, sta if necessary) 133 * Insert a key into data structures (sdata, sta if necessary)
133 * to make it used, free old key. 134 * to make it used, free old key.
134 */ 135 */
135void ieee80211_key_link(struct ieee80211_key *key, 136int __must_check ieee80211_key_link(struct ieee80211_key *key,
136 struct ieee80211_sub_if_data *sdata, 137 struct ieee80211_sub_if_data *sdata,
137 struct sta_info *sta); 138 struct sta_info *sta);
138void ieee80211_key_free(struct ieee80211_local *local, 139void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key); 140 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 141void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ded5c3843e06..107a0cbe52ac 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -54,6 +54,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
54 if (local->monitors || local->scanning) 54 if (local->monitors || local->scanning)
55 new_flags |= FIF_BCN_PRBRESP_PROMISC; 55 new_flags |= FIF_BCN_PRBRESP_PROMISC;
56 56
57 if (local->fif_probe_req || local->probe_req_reg)
58 new_flags |= FIF_PROBE_REQ;
59
57 if (local->fif_fcsfail) 60 if (local->fif_fcsfail)
58 new_flags |= FIF_FCSFAIL; 61 new_flags |= FIF_FCSFAIL;
59 62
@@ -99,16 +102,19 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
99 int ret = 0; 102 int ret = 0;
100 int power; 103 int power;
101 enum nl80211_channel_type channel_type; 104 enum nl80211_channel_type channel_type;
105 u32 offchannel_flag;
102 106
103 might_sleep(); 107 might_sleep();
104 108
105 scan_chan = local->scan_channel; 109 scan_chan = local->scan_channel;
106 110
111 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
107 if (scan_chan) { 112 if (scan_chan) {
108 chan = scan_chan; 113 chan = scan_chan;
109 channel_type = NL80211_CHAN_NO_HT; 114 channel_type = NL80211_CHAN_NO_HT;
110 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 115 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
111 } else if (local->tmp_channel) { 116 } else if (local->tmp_channel &&
117 local->oper_channel != local->tmp_channel) {
112 chan = scan_chan = local->tmp_channel; 118 chan = scan_chan = local->tmp_channel;
113 channel_type = local->tmp_channel_type; 119 channel_type = local->tmp_channel_type;
114 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 120 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
@@ -117,8 +123,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
117 channel_type = local->_oper_channel_type; 123 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 124 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
119 } 125 }
126 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
120 127
121 if (chan != local->hw.conf.channel || 128 if (offchannel_flag || chan != local->hw.conf.channel ||
122 channel_type != local->hw.conf.channel_type) { 129 channel_type != local->hw.conf.channel_type) {
123 local->hw.conf.channel = chan; 130 local->hw.conf.channel = chan;
124 local->hw.conf.channel_type = channel_type; 131 local->hw.conf.channel_type = channel_type;
@@ -197,6 +204,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
197 sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; 204 sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
198 else if (sdata->vif.type == NL80211_IFTYPE_AP) 205 else if (sdata->vif.type == NL80211_IFTYPE_AP)
199 sdata->vif.bss_conf.bssid = sdata->vif.addr; 206 sdata->vif.bss_conf.bssid = sdata->vif.addr;
207 else if (sdata->vif.type == NL80211_IFTYPE_WDS)
208 sdata->vif.bss_conf.bssid = NULL;
200 else if (ieee80211_vif_is_mesh(&sdata->vif)) { 209 else if (ieee80211_vif_is_mesh(&sdata->vif)) {
201 sdata->vif.bss_conf.bssid = zero; 210 sdata->vif.bss_conf.bssid = zero;
202 } else { 211 } else {
@@ -207,6 +216,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
207 switch (sdata->vif.type) { 216 switch (sdata->vif.type) {
208 case NL80211_IFTYPE_AP: 217 case NL80211_IFTYPE_AP:
209 case NL80211_IFTYPE_ADHOC: 218 case NL80211_IFTYPE_ADHOC:
219 case NL80211_IFTYPE_WDS:
210 case NL80211_IFTYPE_MESH_POINT: 220 case NL80211_IFTYPE_MESH_POINT:
211 break; 221 break;
212 default: 222 default:
@@ -291,7 +301,16 @@ static void ieee80211_restart_work(struct work_struct *work)
291 struct ieee80211_local *local = 301 struct ieee80211_local *local =
292 container_of(work, struct ieee80211_local, restart_work); 302 container_of(work, struct ieee80211_local, restart_work);
293 303
304 /* wait for scan work complete */
305 flush_workqueue(local->workqueue);
306
307 mutex_lock(&local->mtx);
308 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
309 "%s called with hardware scan in progress\n", __func__);
310 mutex_unlock(&local->mtx);
311
294 rtnl_lock(); 312 rtnl_lock();
313 ieee80211_scan_cancel(local);
295 ieee80211_reconfig(local); 314 ieee80211_reconfig(local);
296 rtnl_unlock(); 315 rtnl_unlock();
297} 316}
@@ -302,7 +321,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
302 321
303 trace_api_restart_hw(local); 322 trace_api_restart_hw(local);
304 323
305 /* use this reason, __ieee80211_resume will unblock it */ 324 /* use this reason, ieee80211_reconfig will unblock it */
306 ieee80211_stop_queues_by_reason(hw, 325 ieee80211_stop_queues_by_reason(hw,
307 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 326 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
308 327
@@ -316,7 +335,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
316 container_of(work, struct ieee80211_local, recalc_smps); 335 container_of(work, struct ieee80211_local, recalc_smps);
317 336
318 mutex_lock(&local->iflist_mtx); 337 mutex_lock(&local->iflist_mtx);
319 ieee80211_recalc_smps(local, NULL); 338 ieee80211_recalc_smps(local);
320 mutex_unlock(&local->iflist_mtx); 339 mutex_unlock(&local->iflist_mtx);
321} 340}
322 341
@@ -336,9 +355,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
336 struct ieee80211_if_managed *ifmgd; 355 struct ieee80211_if_managed *ifmgd;
337 int c = 0; 356 int c = 0;
338 357
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */ 358 /* Make sure it's our interface that got changed */
343 if (!wdev) 359 if (!wdev)
344 return NOTIFY_DONE; 360 return NOTIFY_DONE;
@@ -349,11 +365,14 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 365 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf; 366 bss_conf = &sdata->vif.bss_conf;
351 367
368 if (!ieee80211_sdata_running(sdata))
369 return NOTIFY_DONE;
370
352 /* ARP filtering is only supported in managed mode */ 371 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION) 372 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE; 373 return NOTIFY_DONE;
355 374
356 idev = sdata->dev->ip_ptr; 375 idev = __in_dev_get_rtnl(sdata->dev);
357 if (!idev) 376 if (!idev)
358 return NOTIFY_DONE; 377 return NOTIFY_DONE;
359 378
@@ -390,6 +409,80 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
390} 409}
391#endif 410#endif
392 411
412static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
413{
414 struct ieee80211_local *local =
415 container_of(napi, struct ieee80211_local, napi);
416
417 return local->ops->napi_poll(&local->hw, budget);
418}
419
420void ieee80211_napi_schedule(struct ieee80211_hw *hw)
421{
422 struct ieee80211_local *local = hw_to_local(hw);
423
424 napi_schedule(&local->napi);
425}
426EXPORT_SYMBOL(ieee80211_napi_schedule);
427
428void ieee80211_napi_complete(struct ieee80211_hw *hw)
429{
430 struct ieee80211_local *local = hw_to_local(hw);
431
432 napi_complete(&local->napi);
433}
434EXPORT_SYMBOL(ieee80211_napi_complete);
435
436/* There isn't a lot of sense in it, but you can transmit anything you like */
437static const struct ieee80211_txrx_stypes
438ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
439 [NL80211_IFTYPE_ADHOC] = {
440 .tx = 0xffff,
441 .rx = BIT(IEEE80211_STYPE_ACTION >> 4),
442 },
443 [NL80211_IFTYPE_STATION] = {
444 .tx = 0xffff,
445 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
446 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
447 },
448 [NL80211_IFTYPE_AP] = {
449 .tx = 0xffff,
450 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
451 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
452 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
453 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
454 BIT(IEEE80211_STYPE_AUTH >> 4) |
455 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
456 BIT(IEEE80211_STYPE_ACTION >> 4),
457 },
458 [NL80211_IFTYPE_AP_VLAN] = {
459 /* copy AP */
460 .tx = 0xffff,
461 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
462 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
463 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
464 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
465 BIT(IEEE80211_STYPE_AUTH >> 4) |
466 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
467 BIT(IEEE80211_STYPE_ACTION >> 4),
468 },
469 [NL80211_IFTYPE_P2P_CLIENT] = {
470 .tx = 0xffff,
471 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
472 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
473 },
474 [NL80211_IFTYPE_P2P_GO] = {
475 .tx = 0xffff,
476 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
477 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
478 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
479 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
480 BIT(IEEE80211_STYPE_AUTH >> 4) |
481 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
482 BIT(IEEE80211_STYPE_ACTION >> 4),
483 },
484};
485
393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 486struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
394 const struct ieee80211_ops *ops) 487 const struct ieee80211_ops *ops)
395{ 488{
@@ -419,6 +512,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
419 if (!wiphy) 512 if (!wiphy)
420 return NULL; 513 return NULL;
421 514
515 wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
516
422 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 517 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
423 WIPHY_FLAG_4ADDR_AP | 518 WIPHY_FLAG_4ADDR_AP |
424 WIPHY_FLAG_4ADDR_STATION; 519 WIPHY_FLAG_4ADDR_STATION;
@@ -444,6 +539,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
444 /* set up some defaults */ 539 /* set up some defaults */
445 local->hw.queues = 1; 540 local->hw.queues = 1;
446 local->hw.max_rates = 1; 541 local->hw.max_rates = 1;
542 local->hw.max_report_rates = 0;
447 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; 543 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
448 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; 544 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
449 local->user_power_level = -1; 545 local->user_power_level = -1;
@@ -455,7 +551,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
455 __hw_addr_init(&local->mc_list); 551 __hw_addr_init(&local->mc_list);
456 552
457 mutex_init(&local->iflist_mtx); 553 mutex_init(&local->iflist_mtx);
458 mutex_init(&local->scan_mtx); 554 mutex_init(&local->mtx);
459 555
460 mutex_init(&local->key_mtx); 556 mutex_init(&local->key_mtx);
461 spin_lock_init(&local->filter_lock); 557 spin_lock_init(&local->filter_lock);
@@ -494,6 +590,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
494 skb_queue_head_init(&local->skb_queue); 590 skb_queue_head_init(&local->skb_queue);
495 skb_queue_head_init(&local->skb_queue_unreliable); 591 skb_queue_head_init(&local->skb_queue_unreliable);
496 592
593 /* init dummy netdev for use w/ NAPI */
594 init_dummy_netdev(&local->napi_dev);
595
497 return local_to_hw(local); 596 return local_to_hw(local);
498} 597}
499EXPORT_SYMBOL(ieee80211_alloc_hw); 598EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -506,6 +605,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
506 int channels, max_bitrates; 605 int channels, max_bitrates;
507 bool supp_ht; 606 bool supp_ht;
508 static const u32 cipher_suites[] = { 607 static const u32 cipher_suites[] = {
608 /* keep WEP first, it may be removed below */
509 WLAN_CIPHER_SUITE_WEP40, 609 WLAN_CIPHER_SUITE_WEP40,
510 WLAN_CIPHER_SUITE_WEP104, 610 WLAN_CIPHER_SUITE_WEP104,
511 WLAN_CIPHER_SUITE_TKIP, 611 WLAN_CIPHER_SUITE_TKIP,
@@ -515,6 +615,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
515 WLAN_CIPHER_SUITE_AES_CMAC 615 WLAN_CIPHER_SUITE_AES_CMAC
516 }; 616 };
517 617
618 if (hw->max_report_rates == 0)
619 hw->max_report_rates = hw->max_rates;
620
518 /* 621 /*
519 * generic code guarantees at least one band, 622 * generic code guarantees at least one band,
520 * set this very early because much code assumes 623 * set this very early because much code assumes
@@ -554,6 +657,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
554 /* mac80211 always supports monitor */ 657 /* mac80211 always supports monitor */
555 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 658 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
556 659
660#ifndef CONFIG_MAC80211_MESH
661 /* mesh depends on Kconfig, but drivers should set it if they want */
662 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
663#endif
664
665 /* mac80211 supports control port protocol changing */
666 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
667
557 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 668 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
558 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 669 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
559 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 670 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
@@ -566,10 +677,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
566 /* 677 /*
567 * Calculate scan IE length -- we need this to alloc 678 * Calculate scan IE length -- we need this to alloc
568 * memory and to subtract from the driver limit. It 679 * memory and to subtract from the driver limit. It
569 * includes the (extended) supported rates and HT 680 * includes the DS Params, (extended) supported rates, and HT
570 * information -- SSID is the driver's responsibility. 681 * information -- SSID is the driver's responsibility.
571 */ 682 */
572 local->scan_ies_len = 4 + max_bitrates; /* (ext) supp rates */ 683 local->scan_ies_len = 4 + max_bitrates /* (ext) supp rates */ +
684 3 /* DS Params */;
573 if (supp_ht) 685 if (supp_ht)
574 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap); 686 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);
575 687
@@ -589,10 +701,41 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
589 if (local->hw.wiphy->max_scan_ie_len) 701 if (local->hw.wiphy->max_scan_ie_len)
590 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; 702 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
591 703
592 local->hw.wiphy->cipher_suites = cipher_suites; 704 /* Set up cipher suites unless driver already did */
593 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); 705 if (!local->hw.wiphy->cipher_suites) {
594 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) 706 local->hw.wiphy->cipher_suites = cipher_suites;
595 local->hw.wiphy->n_cipher_suites--; 707 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
708 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
709 local->hw.wiphy->n_cipher_suites--;
710 }
711 if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) {
712 if (local->hw.wiphy->cipher_suites == cipher_suites) {
713 local->hw.wiphy->cipher_suites += 2;
714 local->hw.wiphy->n_cipher_suites -= 2;
715 } else {
716 u32 *suites;
717 int r, w = 0;
718
719 /* Filter out WEP */
720
721 suites = kmemdup(
722 local->hw.wiphy->cipher_suites,
723 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
724 GFP_KERNEL);
725 if (!suites)
726 return -ENOMEM;
727 for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
728 u32 suite = local->hw.wiphy->cipher_suites[r];
729 if (suite == WLAN_CIPHER_SUITE_WEP40 ||
730 suite == WLAN_CIPHER_SUITE_WEP104)
731 continue;
732 suites[w++] = suite;
733 }
734 local->hw.wiphy->cipher_suites = suites;
735 local->hw.wiphy->n_cipher_suites = w;
736 local->wiphy_ciphers_allocated = true;
737 }
738 }
596 739
597 result = wiphy_register(local->hw.wiphy); 740 result = wiphy_register(local->hw.wiphy);
598 if (result < 0) 741 if (result < 0)
@@ -606,7 +749,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
606 hw->queues = IEEE80211_MAX_QUEUES; 749 hw->queues = IEEE80211_MAX_QUEUES;
607 750
608 local->workqueue = 751 local->workqueue =
609 create_singlethread_workqueue(wiphy_name(local->hw.wiphy)); 752 alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0);
610 if (!local->workqueue) { 753 if (!local->workqueue) {
611 result = -ENOMEM; 754 result = -ENOMEM;
612 goto fail_workqueue; 755 goto fail_workqueue;
@@ -641,16 +784,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
641 784
642 result = ieee80211_wep_init(local); 785 result = ieee80211_wep_init(local);
643 if (result < 0) 786 if (result < 0)
644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 787 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
645 wiphy_name(local->hw.wiphy), result); 788 result);
646 789
647 rtnl_lock(); 790 rtnl_lock();
648 791
649 result = ieee80211_init_rate_ctrl_alg(local, 792 result = ieee80211_init_rate_ctrl_alg(local,
650 hw->rate_control_algorithm); 793 hw->rate_control_algorithm);
651 if (result < 0) { 794 if (result < 0) {
652 printk(KERN_DEBUG "%s: Failed to initialize rate control " 795 wiphy_debug(local->hw.wiphy,
653 "algorithm\n", wiphy_name(local->hw.wiphy)); 796 "Failed to initialize rate control algorithm\n");
654 goto fail_rate; 797 goto fail_rate;
655 } 798 }
656 799
@@ -659,8 +802,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
659 result = ieee80211_if_add(local, "wlan%d", NULL, 802 result = ieee80211_if_add(local, "wlan%d", NULL,
660 NL80211_IFTYPE_STATION, NULL); 803 NL80211_IFTYPE_STATION, NULL);
661 if (result) 804 if (result)
662 printk(KERN_WARNING "%s: Failed to add default virtual iface\n", 805 wiphy_warn(local->hw.wiphy,
663 wiphy_name(local->hw.wiphy)); 806 "Failed to add default virtual iface\n");
664 } 807 }
665 808
666 rtnl_unlock(); 809 rtnl_unlock();
@@ -683,6 +826,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 goto fail_ifa; 826 goto fail_ifa;
684#endif 827#endif
685 828
829 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
830 local->hw.napi_weight);
831
686 return 0; 832 return 0;
687 833
688#ifdef CONFIG_INET 834#ifdef CONFIG_INET
@@ -703,6 +849,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
703 fail_workqueue: 849 fail_workqueue:
704 wiphy_unregister(local->hw.wiphy); 850 wiphy_unregister(local->hw.wiphy);
705 fail_wiphy_register: 851 fail_wiphy_register:
852 if (local->wiphy_ciphers_allocated)
853 kfree(local->hw.wiphy->cipher_suites);
706 kfree(local->int_scan_req); 854 kfree(local->int_scan_req);
707 return result; 855 return result;
708} 856}
@@ -738,6 +886,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
738 */ 886 */
739 del_timer_sync(&local->work_timer); 887 del_timer_sync(&local->work_timer);
740 888
889 cancel_work_sync(&local->restart_work);
741 cancel_work_sync(&local->reconfig_filter); 890 cancel_work_sync(&local->reconfig_filter);
742 891
743 ieee80211_clear_tx_pending(local); 892 ieee80211_clear_tx_pending(local);
@@ -746,8 +895,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
746 895
747 if (skb_queue_len(&local->skb_queue) || 896 if (skb_queue_len(&local->skb_queue) ||
748 skb_queue_len(&local->skb_queue_unreliable)) 897 skb_queue_len(&local->skb_queue_unreliable))
749 printk(KERN_WARNING "%s: skb_queue not empty\n", 898 wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
750 wiphy_name(local->hw.wiphy));
751 skb_queue_purge(&local->skb_queue); 899 skb_queue_purge(&local->skb_queue);
752 skb_queue_purge(&local->skb_queue_unreliable); 900 skb_queue_purge(&local->skb_queue_unreliable);
753 901
@@ -764,7 +912,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
764 struct ieee80211_local *local = hw_to_local(hw); 912 struct ieee80211_local *local = hw_to_local(hw);
765 913
766 mutex_destroy(&local->iflist_mtx); 914 mutex_destroy(&local->iflist_mtx);
767 mutex_destroy(&local->scan_mtx); 915 mutex_destroy(&local->mtx);
916
917 if (local->wiphy_ciphers_allocated)
918 kfree(local->hw.wiphy->cipher_suites);
768 919
769 wiphy_free(local->hw.wiphy); 920 wiphy_free(local->hw.wiphy);
770} 921}
@@ -812,12 +963,6 @@ static void __exit ieee80211_exit(void)
812 rc80211_minstrel_ht_exit(); 963 rc80211_minstrel_ht_exit();
813 rc80211_minstrel_exit(); 964 rc80211_minstrel_exit();
814 965
815 /*
816 * For key todo, it'll be empty by now but the work
817 * might still be scheduled.
818 */
819 flush_scheduled_work();
820
821 if (mesh_allocated) 966 if (mesh_allocated)
822 ieee80211s_stop(); 967 ieee80211s_stop();
823 968
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index ebd3f1d9d889..58e741128968 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -115,7 +115,7 @@ struct mesh_path {
115 * @hash_rnd: random value used for hash computations 115 * @hash_rnd: random value used for hash computations
116 * @entries: number of entries in the table 116 * @entries: number of entries in the table
117 * @free_node: function to free nodes of the table 117 * @free_node: function to free nodes of the table
118 * @copy_node: fuction to copy nodes of the table 118 * @copy_node: function to copy nodes of the table
119 * @size_order: determines size of the table, there will be 2^size_order hash 119 * @size_order: determines size of the table, there will be 2^size_order hash
120 * buckets 120 * buckets
121 * @mean_chain_len: maximum average length for the hash buckets' list, if it is 121 * @mean_chain_len: maximum average length for the hash buckets' list, if it is
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index ea13a80a476c..1c91f0f3c307 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -412,7 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
412 enum plink_event event; 412 enum plink_event event;
413 enum plink_frame_type ftype; 413 enum plink_frame_type ftype;
414 size_t baselen; 414 size_t baselen;
415 bool deactivated; 415 bool deactivated, matches_local = true;
416 u8 ie_len; 416 u8 ie_len;
417 u8 *baseaddr; 417 u8 *baseaddr;
418 __le16 plid, llid, reason; 418 __le16 plid, llid, reason;
@@ -487,6 +487,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
487 /* Now we will figure out the appropriate event... */ 487 /* Now we will figure out the appropriate event... */
488 event = PLINK_UNDEFINED; 488 event = PLINK_UNDEFINED;
489 if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { 489 if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) {
490 matches_local = false;
490 switch (ftype) { 491 switch (ftype) {
491 case PLINK_OPEN: 492 case PLINK_OPEN:
492 event = OPN_RJCT; 493 event = OPN_RJCT;
@@ -498,7 +499,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
498 /* avoid warning */ 499 /* avoid warning */
499 break; 500 break;
500 } 501 }
501 spin_lock_bh(&sta->lock); 502 }
503
504 if (!sta && !matches_local) {
505 rcu_read_unlock();
506 reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION);
507 llid = 0;
508 mesh_plink_frame_tx(sdata, PLINK_CLOSE, mgmt->sa, llid,
509 plid, reason);
510 return;
502 } else if (!sta) { 511 } else if (!sta) {
503 /* ftype == PLINK_OPEN */ 512 /* ftype == PLINK_OPEN */
504 u32 rates; 513 u32 rates;
@@ -522,7 +531,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
522 } 531 }
523 event = OPN_ACPT; 532 event = OPN_ACPT;
524 spin_lock_bh(&sta->lock); 533 spin_lock_bh(&sta->lock);
525 } else { 534 } else if (matches_local) {
526 spin_lock_bh(&sta->lock); 535 spin_lock_bh(&sta->lock);
527 switch (ftype) { 536 switch (ftype) {
528 case PLINK_OPEN: 537 case PLINK_OPEN:
@@ -564,6 +573,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
564 rcu_read_unlock(); 573 rcu_read_unlock();
565 return; 574 return;
566 } 575 }
576 } else {
577 spin_lock_bh(&sta->lock);
567 } 578 }
568 579
569 mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", 580 mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n",
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6c163ac22da..a3a9421555af 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -54,6 +54,12 @@
54 */ 54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3 55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56 56
57/*
58 * How many Beacon frames need to have been used in average signal strength
59 * before starting to indicate signal change events.
60 */
61#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
62
57#define TMR_RUNNING_TIMER 0 63#define TMR_RUNNING_TIMER 0
58#define TMR_RUNNING_CHANSW 1 64#define TMR_RUNNING_CHANSW 1
59 65
@@ -86,7 +92,7 @@ enum rx_mgmt_action {
86/* utils */ 92/* utils */
87static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) 93static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
88{ 94{
89 WARN_ON(!mutex_is_locked(&ifmgd->mtx)); 95 lockdep_assert_held(&ifmgd->mtx);
90} 96}
91 97
92/* 98/*
@@ -109,7 +115,7 @@ static void run_again(struct ieee80211_if_managed *ifmgd,
109 mod_timer(&ifmgd->timer, timeout); 115 mod_timer(&ifmgd->timer, timeout);
110} 116}
111 117
112static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata) 118void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
113{ 119{
114 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER) 120 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER)
115 return; 121 return;
@@ -118,6 +124,19 @@ static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata)
118 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); 124 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME));
119} 125}
120 126
127void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
128{
129 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
130
131 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
132 return;
133
134 mod_timer(&sdata->u.mgd.conn_mon_timer,
135 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
136
137 ifmgd->probe_send_count = 0;
138}
139
121static int ecw2cw(int ecw) 140static int ecw2cw(int ecw)
122{ 141{
123 return (1 << ecw) - 1; 142 return (1 << ecw) - 1;
@@ -778,16 +797,17 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
778 params.uapsd = uapsd; 797 params.uapsd = uapsd;
779 798
780#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 799#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
781 printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " 800 wiphy_debug(local->hw.wiphy,
782 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", 801 "WMM queue=%d aci=%d acm=%d aifs=%d "
783 wiphy_name(local->hw.wiphy), queue, aci, acm, 802 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
784 params.aifs, params.cw_min, params.cw_max, params.txop, 803 queue, aci, acm,
785 params.uapsd); 804 params.aifs, params.cw_min, params.cw_max,
805 params.txop, params.uapsd);
786#endif 806#endif
787 if (drv_conf_tx(local, queue, &params)) 807 if (drv_conf_tx(local, queue, &params))
788 printk(KERN_DEBUG "%s: failed to set TX queue " 808 wiphy_debug(local->hw.wiphy,
789 "parameters for queue %d\n", 809 "failed to set TX queue parameters for queue %d\n",
790 wiphy_name(local->hw.wiphy), queue); 810 queue);
791 } 811 }
792 812
793 /* enable WMM or activate new settings */ 813 /* enable WMM or activate new settings */
@@ -860,14 +880,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
860 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | 880 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
861 IEEE80211_STA_BEACON_POLL); 881 IEEE80211_STA_BEACON_POLL);
862 882
863 /*
864 * Always handle WMM once after association regardless
865 * of the first value the AP uses. Setting -1 here has
866 * that effect because the AP values is an unsigned
867 * 4-bit value.
868 */
869 sdata->u.mgd.wmm_last_param_set = -1;
870
871 ieee80211_led_assoc(local, 1); 883 ieee80211_led_assoc(local, 1);
872 884
873 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) 885 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
@@ -901,7 +913,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
901 913
902 mutex_lock(&local->iflist_mtx); 914 mutex_lock(&local->iflist_mtx);
903 ieee80211_recalc_ps(local, -1); 915 ieee80211_recalc_ps(local, -1);
904 ieee80211_recalc_smps(local, sdata); 916 ieee80211_recalc_smps(local);
905 mutex_unlock(&local->iflist_mtx); 917 mutex_unlock(&local->iflist_mtx);
906 918
907 netif_tx_start_all_queues(sdata->dev); 919 netif_tx_start_all_queues(sdata->dev);
@@ -909,7 +921,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
909} 921}
910 922
911static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, 923static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
912 bool remove_sta) 924 bool remove_sta, bool tx)
913{ 925{
914 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 926 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
915 struct ieee80211_local *local = sdata->local; 927 struct ieee80211_local *local = sdata->local;
@@ -948,7 +960,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
948 sta = sta_info_get(sdata, bssid); 960 sta = sta_info_get(sdata, bssid);
949 if (sta) { 961 if (sta) {
950 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 962 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
951 ieee80211_sta_tear_down_BA_sessions(sta); 963 ieee80211_sta_tear_down_BA_sessions(sta, tx);
952 } 964 }
953 mutex_unlock(&local->sta_mtx); 965 mutex_unlock(&local->sta_mtx);
954 966
@@ -990,6 +1002,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
990 1002
991 if (remove_sta) 1003 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid); 1004 sta_info_destroy_addr(sdata, bssid);
1005
1006 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
1007 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
1008 del_timer_sync(&sdata->u.mgd.timer);
1009 del_timer_sync(&sdata->u.mgd.chswitch_timer);
993} 1010}
994 1011
995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1012void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1006,21 +1023,26 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1006 if (is_multicast_ether_addr(hdr->addr1)) 1023 if (is_multicast_ether_addr(hdr->addr1))
1007 return; 1024 return;
1008 1025
1009 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 1026 ieee80211_sta_reset_conn_monitor(sdata);
1010 return;
1011
1012 mod_timer(&sdata->u.mgd.conn_mon_timer,
1013 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
1014} 1027}
1015 1028
1016static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) 1029static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1017{ 1030{
1018 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1031 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1019 const u8 *ssid; 1032 const u8 *ssid;
1033 u8 *dst = ifmgd->associated->bssid;
1034 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3);
1035
1036 /*
1037 * Try sending broadcast probe requests for the last three
1038 * probe requests after the first ones failed since some
1039 * buggy APs only support broadcast probe requests.
1040 */
1041 if (ifmgd->probe_send_count >= unicast_limit)
1042 dst = NULL;
1020 1043
1021 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); 1044 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1022 ieee80211_send_probe_req(sdata, ifmgd->associated->bssid, 1045 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
1023 ssid + 2, ssid[1], NULL, 0);
1024 1046
1025 ifmgd->probe_send_count++; 1047 ifmgd->probe_send_count++;
1026 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1048 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1102,9 +1124,12 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1102 1124
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1125 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104 1126
1105 ieee80211_set_disassoc(sdata, true); 1127 ieee80211_set_disassoc(sdata, true, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx); 1128 mutex_unlock(&ifmgd->mtx);
1129
1130 mutex_lock(&local->mtx);
1131 ieee80211_recalc_idle(local);
1132 mutex_unlock(&local->mtx);
1108 /* 1133 /*
1109 * must be outside lock due to cfg80211, 1134 * must be outside lock due to cfg80211,
1110 * but that's not a problem. 1135 * but that's not a problem.
@@ -1172,8 +1197,10 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1172 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n", 1197 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
1173 sdata->name, bssid, reason_code); 1198 sdata->name, bssid, reason_code);
1174 1199
1175 ieee80211_set_disassoc(sdata, true); 1200 ieee80211_set_disassoc(sdata, true, false);
1201 mutex_lock(&sdata->local->mtx);
1176 ieee80211_recalc_idle(sdata->local); 1202 ieee80211_recalc_idle(sdata->local);
1203 mutex_unlock(&sdata->local->mtx);
1177 1204
1178 return RX_MGMT_CFG80211_DEAUTH; 1205 return RX_MGMT_CFG80211_DEAUTH;
1179} 1206}
@@ -1202,8 +1229,10 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1202 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n", 1229 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
1203 sdata->name, mgmt->sa, reason_code); 1230 sdata->name, mgmt->sa, reason_code);
1204 1231
1205 ieee80211_set_disassoc(sdata, true); 1232 ieee80211_set_disassoc(sdata, true, false);
1233 mutex_lock(&sdata->local->mtx);
1206 ieee80211_recalc_idle(sdata->local); 1234 ieee80211_recalc_idle(sdata->local);
1235 mutex_unlock(&sdata->local->mtx);
1207 return RX_MGMT_CFG80211_DISASSOC; 1236 return RX_MGMT_CFG80211_DISASSOC;
1208} 1237}
1209 1238
@@ -1262,7 +1291,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1262 1291
1263 rates = 0; 1292 rates = 0;
1264 basic_rates = 0; 1293 basic_rates = 0;
1265 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 1294 sband = local->hw.wiphy->bands[wk->chan->band];
1266 1295
1267 for (i = 0; i < elems.supp_rates_len; i++) { 1296 for (i = 0; i < elems.supp_rates_len; i++) {
1268 int rate = (elems.supp_rates[i] & 0x7f) * 5; 1297 int rate = (elems.supp_rates[i] & 0x7f) * 5;
@@ -1298,11 +1327,11 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1298 } 1327 }
1299 } 1328 }
1300 1329
1301 sta->sta.supp_rates[local->hw.conf.channel->band] = rates; 1330 sta->sta.supp_rates[wk->chan->band] = rates;
1302 sdata->vif.bss_conf.basic_rates = basic_rates; 1331 sdata->vif.bss_conf.basic_rates = basic_rates;
1303 1332
1304 /* cf. IEEE 802.11 9.2.12 */ 1333 /* cf. IEEE 802.11 9.2.12 */
1305 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && 1334 if (wk->chan->band == IEEE80211_BAND_2GHZ &&
1306 have_higher_than_11mbit) 1335 have_higher_than_11mbit)
1307 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; 1336 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
1308 else 1337 else
@@ -1330,6 +1359,14 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1330 return false; 1359 return false;
1331 } 1360 }
1332 1361
1362 /*
1363 * Always handle WMM once after association regardless
1364 * of the first value the AP uses. Setting -1 here has
1365 * that effect because the AP values is an unsigned
1366 * 4-bit value.
1367 */
1368 ifmgd->wmm_last_param_set = -1;
1369
1333 if (elems.wmm_param) 1370 if (elems.wmm_param)
1334 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 1371 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
1335 elems.wmm_param_len); 1372 elems.wmm_param_len);
@@ -1362,7 +1399,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1362 * Also start the timer that will detect beacon loss. 1399 * Also start the timer that will detect beacon loss.
1363 */ 1400 */
1364 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt); 1401 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
1365 mod_beacon_timer(sdata); 1402 ieee80211_sta_reset_beacon_monitor(sdata);
1366 1403
1367 return true; 1404 return true;
1368} 1405}
@@ -1465,7 +1502,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1465 * we have or will be receiving any beacons or data, so let's 1502 * we have or will be receiving any beacons or data, so let's
1466 * schedule the timers again, just in case. 1503 * schedule the timers again, just in case.
1467 */ 1504 */
1468 mod_beacon_timer(sdata); 1505 ieee80211_sta_reset_beacon_monitor(sdata);
1469 1506
1470 mod_timer(&ifmgd->conn_mon_timer, 1507 mod_timer(&ifmgd->conn_mon_timer,
1471 round_jiffies_up(jiffies + 1508 round_jiffies_up(jiffies +
@@ -1540,15 +1577,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1540 ifmgd->last_beacon_signal = rx_status->signal; 1577 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 1578 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 1579 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal; 1580 ifmgd->ave_beacon_signal = rx_status->signal * 16;
1544 ifmgd->last_cqm_event_signal = 0; 1581 ifmgd->last_cqm_event_signal = 0;
1582 ifmgd->count_beacon_signal = 1;
1545 } else { 1583 } else {
1546 ifmgd->ave_beacon_signal = 1584 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + 1585 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) * 1586 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16; 1587 ifmgd->ave_beacon_signal) / 16;
1588 ifmgd->count_beacon_signal++;
1550 } 1589 }
1551 if (bss_conf->cqm_rssi_thold && 1590 if (bss_conf->cqm_rssi_thold &&
1591 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { 1592 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16; 1593 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal; 1594 int last_event = ifmgd->last_cqm_event_signal;
@@ -1588,7 +1628,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1588 * Push the beacon loss detection into the future since 1628 * Push the beacon loss detection into the future since
1589 * we are processing a beacon from the AP just now. 1629 * we are processing a beacon from the AP just now.
1590 */ 1630 */
1591 mod_beacon_timer(sdata); 1631 ieee80211_sta_reset_beacon_monitor(sdata);
1592 1632
1593 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); 1633 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
1594 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, 1634 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
@@ -1599,7 +1639,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1599 directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len, 1639 directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len,
1600 ifmgd->aid); 1640 ifmgd->aid);
1601 1641
1602 if (ncrc != ifmgd->beacon_crc) { 1642 if (ncrc != ifmgd->beacon_crc || !ifmgd->beacon_crc_valid) {
1603 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 1643 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
1604 true); 1644 true);
1605 1645
@@ -1630,9 +1670,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1630 } 1670 }
1631 } 1671 }
1632 1672
1633 if (ncrc == ifmgd->beacon_crc) 1673 if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
1634 return; 1674 return;
1635 ifmgd->beacon_crc = ncrc; 1675 ifmgd->beacon_crc = ncrc;
1676 ifmgd->beacon_crc_valid = true;
1636 1677
1637 if (elems.erp_info && elems.erp_info_len >= 1) { 1678 if (elems.erp_info && elems.erp_info_len >= 1) {
1638 erp_valid = true; 1679 erp_valid = true;
@@ -1751,7 +1792,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1751 struct ieee80211_local *local = sdata->local; 1792 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk; 1793 struct ieee80211_work *wk;
1753 1794
1754 mutex_lock(&local->work_mtx); 1795 mutex_lock(&local->mtx);
1755 list_for_each_entry(wk, &local->work_list, list) { 1796 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata) 1797 if (wk->sdata != sdata)
1757 continue; 1798 continue;
@@ -1783,7 +1824,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1783 free_work(wk); 1824 free_work(wk);
1784 break; 1825 break;
1785 } 1826 }
1786 mutex_unlock(&local->work_mtx); 1827 mutex_unlock(&local->mtx);
1787 1828
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1829 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 } 1830 }
@@ -1823,10 +1864,12 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1823 1864
1824 else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) { 1865 else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) {
1825#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1866#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1826 printk(KERN_DEBUG "No probe response from AP %pM" 1867 wiphy_debug(local->hw.wiphy,
1827 " after %dms, try %d\n", bssid, 1868 "%s: No probe response from AP %pM"
1828 (1000 * IEEE80211_PROBE_WAIT)/HZ, 1869 " after %dms, try %d\n",
1829 ifmgd->probe_send_count); 1870 sdata->name,
1871 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ,
1872 ifmgd->probe_send_count);
1830#endif 1873#endif
1831 ieee80211_mgd_probe_ap_send(sdata); 1874 ieee80211_mgd_probe_ap_send(sdata);
1832 } else { 1875 } else {
@@ -1836,12 +1879,16 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1836 */ 1879 */
1837 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | 1880 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
1838 IEEE80211_STA_BEACON_POLL); 1881 IEEE80211_STA_BEACON_POLL);
1839 printk(KERN_DEBUG "No probe response from AP %pM" 1882 wiphy_debug(local->hw.wiphy,
1840 " after %dms, disconnecting.\n", 1883 "%s: No probe response from AP %pM"
1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1884 " after %dms, disconnecting.\n",
1842 ieee80211_set_disassoc(sdata, true); 1885 sdata->name,
1843 ieee80211_recalc_idle(local); 1886 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1887 ieee80211_set_disassoc(sdata, true, true);
1844 mutex_unlock(&ifmgd->mtx); 1888 mutex_unlock(&ifmgd->mtx);
1889 mutex_lock(&local->mtx);
1890 ieee80211_recalc_idle(local);
1891 mutex_unlock(&local->mtx);
1845 /* 1892 /*
1846 * must be outside lock due to cfg80211, 1893 * must be outside lock due to cfg80211,
1847 * but that's not a problem. 1894 * but that's not a problem.
@@ -1917,6 +1964,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1917 * time -- the code here is properly synchronised. 1964 * time -- the code here is properly synchronised.
1918 */ 1965 */
1919 1966
1967 cancel_work_sync(&ifmgd->request_smps_work);
1968
1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work); 1969 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1921 if (del_timer_sync(&ifmgd->timer)) 1970 if (del_timer_sync(&ifmgd->timer))
1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1971 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1952,6 +2001,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 2001 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1953 INIT_WORK(&ifmgd->beacon_connection_loss_work, 2002 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work); 2003 ieee80211_beacon_connection_loss_work);
2004 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 2005 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1956 (unsigned long) sdata); 2006 (unsigned long) sdata);
1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 2007 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2158,7 +2208,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2158 } 2208 }
2159 2209
2160 /* Trying to reassociate - clear previous association state */ 2210 /* Trying to reassociate - clear previous association state */
2161 ieee80211_set_disassoc(sdata, true); 2211 ieee80211_set_disassoc(sdata, true, false);
2162 } 2212 }
2163 mutex_unlock(&ifmgd->mtx); 2213 mutex_unlock(&ifmgd->mtx);
2164 2214
@@ -2169,6 +2219,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2169 ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; 2219 ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
2170 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; 2220 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
2171 2221
2222 ifmgd->beacon_crc_valid = false;
2223
2172 for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) 2224 for (i = 0; i < req->crypto.n_ciphers_pairwise; i++)
2173 if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 || 2225 if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
2174 req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || 2226 req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
@@ -2249,6 +2301,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2249 else 2301 else
2250 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT; 2302 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
2251 2303
2304 sdata->control_port_protocol = req->crypto.control_port_ethertype;
2305 sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
2306
2252 ieee80211_add_work(wk); 2307 ieee80211_add_work(wk);
2253 return 0; 2308 return 0;
2254} 2309}
@@ -2267,7 +2322,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2267 2322
2268 memcpy(bssid, req->bss->bssid, ETH_ALEN); 2323 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2269 if (ifmgd->associated == req->bss) { 2324 if (ifmgd->associated == req->bss) {
2270 ieee80211_set_disassoc(sdata, false); 2325 ieee80211_set_disassoc(sdata, false, true);
2271 mutex_unlock(&ifmgd->mtx); 2326 mutex_unlock(&ifmgd->mtx);
2272 assoc_bss = true; 2327 assoc_bss = true;
2273 } else { 2328 } else {
@@ -2275,7 +2330,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2275 2330
2276 mutex_unlock(&ifmgd->mtx); 2331 mutex_unlock(&ifmgd->mtx);
2277 2332
2278 mutex_lock(&local->work_mtx); 2333 mutex_lock(&local->mtx);
2279 list_for_each_entry(wk, &local->work_list, list) { 2334 list_for_each_entry(wk, &local->work_list, list) {
2280 if (wk->sdata != sdata) 2335 if (wk->sdata != sdata)
2281 continue; 2336 continue;
@@ -2294,7 +2349,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2294 free_work(wk); 2349 free_work(wk);
2295 break; 2350 break;
2296 } 2351 }
2297 mutex_unlock(&local->work_mtx); 2352 mutex_unlock(&local->mtx);
2298 2353
2299 /* 2354 /*
2300 * If somebody requests authentication and we haven't 2355 * If somebody requests authentication and we haven't
@@ -2319,7 +2374,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2319 if (assoc_bss) 2374 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid); 2375 sta_info_destroy_addr(sdata, bssid);
2321 2376
2377 mutex_lock(&sdata->local->mtx);
2322 ieee80211_recalc_idle(sdata->local); 2378 ieee80211_recalc_idle(sdata->local);
2379 mutex_unlock(&sdata->local->mtx);
2323 2380
2324 return 0; 2381 return 0;
2325} 2382}
@@ -2348,7 +2405,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2348 sdata->name, req->bss->bssid, req->reason_code); 2405 sdata->name, req->bss->bssid, req->reason_code);
2349 2406
2350 memcpy(bssid, req->bss->bssid, ETH_ALEN); 2407 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2351 ieee80211_set_disassoc(sdata, false); 2408 ieee80211_set_disassoc(sdata, false, true);
2352 2409
2353 mutex_unlock(&ifmgd->mtx); 2410 mutex_unlock(&ifmgd->mtx);
2354 2411
@@ -2357,7 +2414,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2357 cookie, !req->local_state_change); 2414 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid); 2415 sta_info_destroy_addr(sdata, bssid);
2359 2416
2417 mutex_lock(&sdata->local->mtx);
2360 ieee80211_recalc_idle(sdata->local); 2418 ieee80211_recalc_idle(sdata->local);
2419 mutex_unlock(&sdata->local->mtx);
2361 2420
2362 return 0; 2421 return 0;
2363} 2422}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c36b1911987a..4b564091e51d 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -22,12 +22,16 @@
22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
23{ 23{
24 struct ieee80211_local *local = sdata->local; 24 struct ieee80211_local *local = sdata->local;
25 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
25 26
26 local->offchannel_ps_enabled = false; 27 local->offchannel_ps_enabled = false;
27 28
28 /* FIXME: what to do when local->pspolling is true? */ 29 /* FIXME: what to do when local->pspolling is true? */
29 30
30 del_timer_sync(&local->dynamic_ps_timer); 31 del_timer_sync(&local->dynamic_ps_timer);
32 del_timer_sync(&ifmgd->bcn_mon_timer);
33 del_timer_sync(&ifmgd->conn_mon_timer);
34
31 cancel_work_sync(&local->dynamic_ps_enable_work); 35 cancel_work_sync(&local->dynamic_ps_enable_work);
32 36
33 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 37 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
@@ -85,6 +89,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
85 mod_timer(&local->dynamic_ps_timer, jiffies + 89 mod_timer(&local->dynamic_ps_timer, jiffies +
86 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 90 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
87 } 91 }
92
93 ieee80211_sta_reset_beacon_monitor(sdata);
94 ieee80211_sta_reset_conn_monitor(sdata);
88} 95}
89 96
90void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 97void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
@@ -112,8 +119,10 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
112 * used from user space controlled off-channel operations. 119 * used from user space controlled off-channel operations.
113 */ 120 */
114 if (sdata->vif.type != NL80211_IFTYPE_STATION && 121 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
115 sdata->vif.type != NL80211_IFTYPE_MONITOR) 122 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
123 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
116 netif_tx_stop_all_queues(sdata->dev); 124 netif_tx_stop_all_queues(sdata->dev);
125 }
117 } 126 }
118 mutex_unlock(&local->iflist_mtx); 127 mutex_unlock(&local->iflist_mtx);
119} 128}
@@ -131,6 +140,7 @@ void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
131 continue; 140 continue;
132 141
133 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 142 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
143 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
134 netif_tx_stop_all_queues(sdata->dev); 144 netif_tx_stop_all_queues(sdata->dev);
135 if (sdata->u.mgd.associated) 145 if (sdata->u.mgd.associated)
136 ieee80211_offchannel_ps_enable(sdata); 146 ieee80211_offchannel_ps_enable(sdata);
@@ -155,8 +165,20 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
155 ieee80211_offchannel_ps_disable(sdata); 165 ieee80211_offchannel_ps_disable(sdata);
156 } 166 }
157 167
158 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) 168 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
169 clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
170 /*
171 * This may wake up queues even though the driver
172 * currently has them stopped. This is not very
173 * likely, since the driver won't have gotten any
174 * (or hardly any) new packets while we weren't
175 * on the right channel, and even if it happens
176 * it will at most lead to queueing up one more
177 * packet per queue in mac80211 rather than on
178 * the interface qdisc.
179 */
159 netif_tx_wake_all_queues(sdata->dev); 180 netif_tx_wake_all_queues(sdata->dev);
181 }
160 182
161 /* re-enable beaconing */ 183 /* re-enable beaconing */
162 if (enable_beaconing && 184 if (enable_beaconing &&
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d287fde0431d..e37355193ed1 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -45,7 +45,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
45 list_for_each_entry(sta, &local->sta_list, list) { 45 list_for_each_entry(sta, &local->sta_list, list) {
46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
47 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 47 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
48 ieee80211_sta_tear_down_BA_sessions(sta); 48 ieee80211_sta_tear_down_BA_sessions(sta, true);
49 } 49 }
50 50
51 if (sta->uploaded) { 51 if (sta->uploaded) {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index be04d46110fe..33f76993da08 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -145,6 +145,7 @@ static ssize_t rcname_read(struct file *file, char __user *userbuf,
145static const struct file_operations rcname_ops = { 145static const struct file_operations rcname_ops = {
146 .read = rcname_read, 146 .read = rcname_read,
147 .open = mac80211_open_file_generic, 147 .open = mac80211_open_file_generic,
148 .llseek = default_llseek,
148}; 149};
149#endif 150#endif
150 151
@@ -207,7 +208,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
207 208
208 fc = hdr->frame_control; 209 fc = hdr->frame_control;
209 210
210 return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); 211 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
211} 212}
212 213
213static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) 214static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
@@ -328,6 +329,9 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
328 * if needed. 329 * if needed.
329 */ 330 */
330 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 331 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
332 /* Skip invalid rates */
333 if (info->control.rates[i].idx < 0)
334 break;
331 /* Rate masking supports only legacy rates for now */ 335 /* Rate masking supports only legacy rates for now */
332 if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) 336 if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS)
333 continue; 337 continue;
@@ -368,8 +372,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
368 372
369 ref = rate_control_alloc(name, local); 373 ref = rate_control_alloc(name, local);
370 if (!ref) { 374 if (!ref) {
371 printk(KERN_WARNING "%s: Failed to select rate control " 375 wiphy_warn(local->hw.wiphy,
372 "algorithm\n", wiphy_name(local->hw.wiphy)); 376 "Failed to select rate control algorithm\n");
373 return -ENOENT; 377 return -ENOENT;
374 } 378 }
375 379
@@ -380,9 +384,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
380 sta_info_flush(local, NULL); 384 sta_info_flush(local, NULL);
381 } 385 }
382 386
383 printk(KERN_DEBUG "%s: Selected rate control " 387 wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n",
384 "algorithm '%s'\n", wiphy_name(local->hw.wiphy), 388 ref->ops->name);
385 ref->ops->name);
386 389
387 return 0; 390 return 0;
388} 391}
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 241e76f3fdf2..a290ad231d77 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -122,6 +122,7 @@ static const struct file_operations minstrel_stat_fops = {
122 .open = minstrel_stats_open, 122 .open = minstrel_stats_open,
123 .read = minstrel_stats_read, 123 .read = minstrel_stats_read,
124 .release = minstrel_stats_release, 124 .release = minstrel_stats_release,
125 .llseek = default_llseek,
125}; 126};
126 127
127void 128void
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index c5b465904e3b..2a18d6602d4a 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -397,8 +397,9 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
397 !(info->flags & IEEE80211_TX_STAT_AMPDU)) 397 !(info->flags & IEEE80211_TX_STAT_AMPDU))
398 return; 398 return;
399 399
400 if (!info->status.ampdu_len) { 400 if (!(info->flags & IEEE80211_TX_STAT_AMPDU)) {
401 info->status.ampdu_ack_len = 1; 401 info->status.ampdu_ack_len =
402 (info->flags & IEEE80211_TX_STAT_ACK ? 1 : 0);
402 info->status.ampdu_len = 1; 403 info->status.ampdu_len = 1;
403 } 404 }
404 405
@@ -426,7 +427,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
426 group = minstrel_ht_get_group_idx(&ar[i]); 427 group = minstrel_ht_get_group_idx(&ar[i]);
427 rate = &mi->groups[group].rates[ar[i].idx % 8]; 428 rate = &mi->groups[group].rates[ar[i].idx % 8];
428 429
429 if (last && (info->flags & IEEE80211_TX_STAT_ACK)) 430 if (last)
430 rate->success += info->status.ampdu_ack_len; 431 rate->success += info->status.ampdu_ack_len;
431 432
432 rate->attempts += ar[i].count * info->status.ampdu_len; 433 rate->attempts += ar[i].count * info->status.ampdu_len;
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 4a5a4b3e7799..cefcb5d2dae6 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -90,7 +90,7 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
90 MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); 90 MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
91 ms->len = p - ms->buf; 91 ms->len = p - ms->buf;
92 92
93 return 0; 93 return nonseekable_open(inode, file);
94} 94}
95 95
96static const struct file_operations minstrel_ht_stat_fops = { 96static const struct file_operations minstrel_ht_stat_fops = {
@@ -98,6 +98,7 @@ static const struct file_operations minstrel_ht_stat_fops = {
98 .open = minstrel_ht_stats_open, 98 .open = minstrel_ht_stats_open,
99 .read = minstrel_stats_read, 99 .read = minstrel_stats_read,
100 .release = minstrel_stats_release, 100 .release = minstrel_stats_release,
101 .llseek = no_llseek,
101}; 102};
102 103
103void 104void
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 47438b4a9af5..4851e9e2daed 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -162,7 +162,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
162 file_info->next_entry = (file_info->next_entry + 1) % 162 file_info->next_entry = (file_info->next_entry + 1) %
163 RC_PID_EVENT_RING_SIZE; 163 RC_PID_EVENT_RING_SIZE;
164 164
165 /* Print information about the event. Note that userpace needs to 165 /* Print information about the event. Note that userspace needs to
166 * provide large enough buffers. */ 166 * provide large enough buffers. */
167 length = length < RC_PID_PRINT_BUF_SIZE ? 167 length = length < RC_PID_PRINT_BUF_SIZE ?
168 length : RC_PID_PRINT_BUF_SIZE; 168 length : RC_PID_PRINT_BUF_SIZE;
@@ -206,6 +206,7 @@ static const struct file_operations rc_pid_fop_events = {
206 .poll = rate_control_pid_events_poll, 206 .poll = rate_control_pid_events_poll,
207 .open = rate_control_pid_events_open, 207 .open = rate_control_pid_events_open,
208 .release = rate_control_pid_events_release, 208 .release = rate_control_pid_events_release,
209 .llseek = noop_llseek,
209}; 210};
210 211
211void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta, 212void rate_control_pid_add_sta_debugfs(void *priv, void *priv_sta,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 28624282c5f3..902b03ee8f60 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -315,6 +315,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) 315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
316{ 316{
317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
318 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
318 int tid; 319 int tid;
319 320
320 /* does the frame have a qos control field? */ 321 /* does the frame have a qos control field? */
@@ -323,9 +324,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
323 /* frame has qos control */ 324 /* frame has qos control */
324 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 325 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
325 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) 326 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
326 rx->flags |= IEEE80211_RX_AMSDU; 327 status->rx_flags |= IEEE80211_RX_AMSDU;
327 else
328 rx->flags &= ~IEEE80211_RX_AMSDU;
329 } else { 328 } else {
330 /* 329 /*
331 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): 330 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"):
@@ -387,26 +386,25 @@ static ieee80211_rx_result debug_noinline
387ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) 386ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
388{ 387{
389 struct ieee80211_local *local = rx->local; 388 struct ieee80211_local *local = rx->local;
389 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
390 struct sk_buff *skb = rx->skb; 390 struct sk_buff *skb = rx->skb;
391 391
392 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning))) 392 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
393 return RX_CONTINUE;
394
395 if (test_bit(SCAN_HW_SCANNING, &local->scanning))
393 return ieee80211_scan_rx(rx->sdata, skb); 396 return ieee80211_scan_rx(rx->sdata, skb);
394 397
395 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning) && 398 if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
396 (rx->flags & IEEE80211_RX_IN_SCAN))) {
397 /* drop all the other packets during a software scan anyway */ 399 /* drop all the other packets during a software scan anyway */
398 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) 400 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
399 dev_kfree_skb(skb); 401 dev_kfree_skb(skb);
400 return RX_QUEUED; 402 return RX_QUEUED;
401 } 403 }
402 404
403 if (unlikely(rx->flags & IEEE80211_RX_IN_SCAN)) { 405 /* scanning finished during invoking of handlers */
404 /* scanning finished during invoking of handlers */ 406 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
405 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); 407 return RX_DROP_UNUSABLE;
406 return RX_DROP_UNUSABLE;
407 }
408
409 return RX_CONTINUE;
410} 408}
411 409
412 410
@@ -538,20 +536,12 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
538 int index, 536 int index,
539 struct sk_buff_head *frames) 537 struct sk_buff_head *frames)
540{ 538{
541 struct ieee80211_supported_band *sband;
542 struct ieee80211_rate *rate = NULL;
543 struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; 539 struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
544 struct ieee80211_rx_status *status;
545 540
546 if (!skb) 541 if (!skb)
547 goto no_frame; 542 goto no_frame;
548 543
549 status = IEEE80211_SKB_RXCB(skb); 544 /* release the frame from the reorder ring buffer */
550
551 /* release the reordered frames to stack */
552 sband = hw->wiphy->bands[status->band];
553 if (!(status->flag & RX_FLAG_HT))
554 rate = &sband->bitrates[status->rate_idx];
555 tid_agg_rx->stored_mpdu_num--; 545 tid_agg_rx->stored_mpdu_num--;
556 tid_agg_rx->reorder_buf[index] = NULL; 546 tid_agg_rx->reorder_buf[index] = NULL;
557 __skb_queue_tail(frames, skb); 547 __skb_queue_tail(frames, skb);
@@ -580,9 +570,102 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
580 * frames that have not yet been received are assumed to be lost and the skb 570 * frames that have not yet been received are assumed to be lost and the skb
581 * can be released for processing. This may also release other skb's from the 571 * can be released for processing. This may also release other skb's from the
582 * reorder buffer if there are no additional gaps between the frames. 572 * reorder buffer if there are no additional gaps between the frames.
573 *
574 * Callers must hold tid_agg_rx->reorder_lock.
583 */ 575 */
584#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) 576#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
585 577
578static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
579 struct tid_ampdu_rx *tid_agg_rx,
580 struct sk_buff_head *frames)
581{
582 int index, j;
583
584 /* release the buffer until next missing frame */
585 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
586 tid_agg_rx->buf_size;
587 if (!tid_agg_rx->reorder_buf[index] &&
588 tid_agg_rx->stored_mpdu_num > 1) {
589 /*
590 * No buffers ready to be released, but check whether any
591 * frames in the reorder buffer have timed out.
592 */
593 int skipped = 1;
594 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
595 j = (j + 1) % tid_agg_rx->buf_size) {
596 if (!tid_agg_rx->reorder_buf[j]) {
597 skipped++;
598 continue;
599 }
600 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
601 HT_RX_REORDER_BUF_TIMEOUT))
602 goto set_release_timer;
603
604#ifdef CONFIG_MAC80211_HT_DEBUG
605 if (net_ratelimit())
606 wiphy_debug(hw->wiphy,
607 "release an RX reorder frame due to timeout on earlier frames\n");
608#endif
609 ieee80211_release_reorder_frame(hw, tid_agg_rx,
610 j, frames);
611
612 /*
613 * Increment the head seq# also for the skipped slots.
614 */
615 tid_agg_rx->head_seq_num =
616 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
617 skipped = 0;
618 }
619 } else while (tid_agg_rx->reorder_buf[index]) {
620 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
621 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
622 tid_agg_rx->buf_size;
623 }
624
625 /*
626 * Disable the reorder release timer for now.
627 *
628 * The current implementation lacks a proper locking scheme
629 * which would protect vital statistic and debug counters
630 * from being updated by two different but concurrent BHs.
631 *
632 * More information about the topic is available from:
633 * - thread: http://marc.info/?t=128635927000001
634 *
635 * What was wrong:
636 * => http://marc.info/?l=linux-wireless&m=128636170811964
637 * "Basically the thing is that until your patch, the data
638 * in the struct didn't actually need locking because it
639 * was accessed by the RX path only which is not concurrent."
640 *
641 * List of what needs to be fixed:
642 * => http://marc.info/?l=linux-wireless&m=128656352920957
643 *
644
645 if (tid_agg_rx->stored_mpdu_num) {
646 j = index = seq_sub(tid_agg_rx->head_seq_num,
647 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
648
649 for (; j != (index - 1) % tid_agg_rx->buf_size;
650 j = (j + 1) % tid_agg_rx->buf_size) {
651 if (tid_agg_rx->reorder_buf[j])
652 break;
653 }
654
655 set_release_timer:
656
657 mod_timer(&tid_agg_rx->reorder_timer,
658 tid_agg_rx->reorder_time[j] +
659 HT_RX_REORDER_BUF_TIMEOUT);
660 } else {
661 del_timer(&tid_agg_rx->reorder_timer);
662 }
663 */
664
665set_release_timer:
666 return;
667}
668
586/* 669/*
587 * As this function belongs to the RX path it must be under 670 * As this function belongs to the RX path it must be under
588 * rcu_read_lock protection. It returns false if the frame 671 * rcu_read_lock protection. It returns false if the frame
@@ -598,14 +681,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
598 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 681 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
599 u16 head_seq_num, buf_size; 682 u16 head_seq_num, buf_size;
600 int index; 683 int index;
684 bool ret = true;
601 685
602 buf_size = tid_agg_rx->buf_size; 686 buf_size = tid_agg_rx->buf_size;
603 head_seq_num = tid_agg_rx->head_seq_num; 687 head_seq_num = tid_agg_rx->head_seq_num;
604 688
689 spin_lock(&tid_agg_rx->reorder_lock);
605 /* frame with out of date sequence number */ 690 /* frame with out of date sequence number */
606 if (seq_less(mpdu_seq_num, head_seq_num)) { 691 if (seq_less(mpdu_seq_num, head_seq_num)) {
607 dev_kfree_skb(skb); 692 dev_kfree_skb(skb);
608 return true; 693 goto out;
609 } 694 }
610 695
611 /* 696 /*
@@ -626,7 +711,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
626 /* check if we already stored this frame */ 711 /* check if we already stored this frame */
627 if (tid_agg_rx->reorder_buf[index]) { 712 if (tid_agg_rx->reorder_buf[index]) {
628 dev_kfree_skb(skb); 713 dev_kfree_skb(skb);
629 return true; 714 goto out;
630 } 715 }
631 716
632 /* 717 /*
@@ -636,58 +721,19 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
636 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 721 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
637 tid_agg_rx->stored_mpdu_num == 0) { 722 tid_agg_rx->stored_mpdu_num == 0) {
638 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 723 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
639 return false; 724 ret = false;
725 goto out;
640 } 726 }
641 727
642 /* put the frame in the reordering buffer */ 728 /* put the frame in the reordering buffer */
643 tid_agg_rx->reorder_buf[index] = skb; 729 tid_agg_rx->reorder_buf[index] = skb;
644 tid_agg_rx->reorder_time[index] = jiffies; 730 tid_agg_rx->reorder_time[index] = jiffies;
645 tid_agg_rx->stored_mpdu_num++; 731 tid_agg_rx->stored_mpdu_num++;
646 /* release the buffer until next missing frame */ 732 ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
647 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
648 tid_agg_rx->buf_size;
649 if (!tid_agg_rx->reorder_buf[index] &&
650 tid_agg_rx->stored_mpdu_num > 1) {
651 /*
652 * No buffers ready to be released, but check whether any
653 * frames in the reorder buffer have timed out.
654 */
655 int j;
656 int skipped = 1;
657 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
658 j = (j + 1) % tid_agg_rx->buf_size) {
659 if (!tid_agg_rx->reorder_buf[j]) {
660 skipped++;
661 continue;
662 }
663 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
664 HT_RX_REORDER_BUF_TIMEOUT))
665 break;
666 733
667#ifdef CONFIG_MAC80211_HT_DEBUG 734 out:
668 if (net_ratelimit()) 735 spin_unlock(&tid_agg_rx->reorder_lock);
669 printk(KERN_DEBUG "%s: release an RX reorder " 736 return ret;
670 "frame due to timeout on earlier "
671 "frames\n",
672 wiphy_name(hw->wiphy));
673#endif
674 ieee80211_release_reorder_frame(hw, tid_agg_rx,
675 j, frames);
676
677 /*
678 * Increment the head seq# also for the skipped slots.
679 */
680 tid_agg_rx->head_seq_num =
681 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
682 skipped = 0;
683 }
684 } else while (tid_agg_rx->reorder_buf[index]) {
685 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
686 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
687 tid_agg_rx->buf_size;
688 }
689
690 return true;
691} 737}
692 738
693/* 739/*
@@ -761,13 +807,14 @@ static ieee80211_rx_result debug_noinline
761ieee80211_rx_h_check(struct ieee80211_rx_data *rx) 807ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
762{ 808{
763 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 809 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
810 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
764 811
765 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ 812 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
766 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { 813 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
767 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 814 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
768 rx->sta->last_seq_ctrl[rx->queue] == 815 rx->sta->last_seq_ctrl[rx->queue] ==
769 hdr->seq_ctrl)) { 816 hdr->seq_ctrl)) {
770 if (rx->flags & IEEE80211_RX_RA_MATCH) { 817 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
771 rx->local->dot11FrameDuplicateCount++; 818 rx->local->dot11FrameDuplicateCount++;
772 rx->sta->num_duplicates++; 819 rx->sta->num_duplicates++;
773 } 820 }
@@ -796,11 +843,12 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
796 if (unlikely((ieee80211_is_data(hdr->frame_control) || 843 if (unlikely((ieee80211_is_data(hdr->frame_control) ||
797 ieee80211_is_pspoll(hdr->frame_control)) && 844 ieee80211_is_pspoll(hdr->frame_control)) &&
798 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && 845 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
846 rx->sdata->vif.type != NL80211_IFTYPE_WDS &&
799 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { 847 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
800 if ((!ieee80211_has_fromds(hdr->frame_control) && 848 if ((!ieee80211_has_fromds(hdr->frame_control) &&
801 !ieee80211_has_tods(hdr->frame_control) && 849 !ieee80211_has_tods(hdr->frame_control) &&
802 ieee80211_is_data(hdr->frame_control)) || 850 ieee80211_is_data(hdr->frame_control)) ||
803 !(rx->flags & IEEE80211_RX_RA_MATCH)) { 851 !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
804 /* Drop IBSS frames and frames for other hosts 852 /* Drop IBSS frames and frames for other hosts
805 * silently. */ 853 * silently. */
806 return RX_DROP_MONITOR; 854 return RX_DROP_MONITOR;
@@ -822,7 +870,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
822 int keyidx; 870 int keyidx;
823 int hdrlen; 871 int hdrlen;
824 ieee80211_rx_result result = RX_DROP_UNUSABLE; 872 ieee80211_rx_result result = RX_DROP_UNUSABLE;
825 struct ieee80211_key *stakey = NULL; 873 struct ieee80211_key *sta_ptk = NULL;
826 int mmie_keyidx = -1; 874 int mmie_keyidx = -1;
827 __le16 fc; 875 __le16 fc;
828 876
@@ -857,22 +905,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
857 * No point in finding a key and decrypting if the frame is neither 905 * No point in finding a key and decrypting if the frame is neither
858 * addressed to us nor a multicast frame. 906 * addressed to us nor a multicast frame.
859 */ 907 */
860 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 908 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
861 return RX_CONTINUE; 909 return RX_CONTINUE;
862 910
863 /* start without a key */ 911 /* start without a key */
864 rx->key = NULL; 912 rx->key = NULL;
865 913
866 if (rx->sta) 914 if (rx->sta)
867 stakey = rcu_dereference(rx->sta->key); 915 sta_ptk = rcu_dereference(rx->sta->ptk);
868 916
869 fc = hdr->frame_control; 917 fc = hdr->frame_control;
870 918
871 if (!ieee80211_has_protected(fc)) 919 if (!ieee80211_has_protected(fc))
872 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); 920 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
873 921
874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 922 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
875 rx->key = stakey; 923 rx->key = sta_ptk;
924 if ((status->flag & RX_FLAG_DECRYPTED) &&
925 (status->flag & RX_FLAG_IV_STRIPPED))
926 return RX_CONTINUE;
876 /* Skip decryption if the frame is not protected. */ 927 /* Skip decryption if the frame is not protected. */
877 if (!ieee80211_has_protected(fc)) 928 if (!ieee80211_has_protected(fc))
878 return RX_CONTINUE; 929 return RX_CONTINUE;
@@ -885,7 +936,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
885 if (mmie_keyidx < NUM_DEFAULT_KEYS || 936 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
886 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) 937 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
887 return RX_DROP_MONITOR; /* unexpected BIP keyidx */ 938 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
888 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); 939 if (rx->sta)
940 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
941 if (!rx->key)
942 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
889 } else if (!ieee80211_has_protected(fc)) { 943 } else if (!ieee80211_has_protected(fc)) {
890 /* 944 /*
891 * The frame was not protected, so skip decryption. However, we 945 * The frame was not protected, so skip decryption. However, we
@@ -928,16 +982,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
928 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); 982 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
929 keyidx = keyid >> 6; 983 keyidx = keyid >> 6;
930 984
931 rx->key = rcu_dereference(rx->sdata->keys[keyidx]); 985 /* check per-station GTK first, if multicast packet */
986 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
987 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
932 988
933 /* 989 /* if not found, try default key */
934 * RSNA-protected unicast frames should always be sent with 990 if (!rx->key) {
935 * pairwise or station-to-station keys, but for WEP we allow 991 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
936 * using a key index as well. 992
937 */ 993 /*
938 if (rx->key && rx->key->conf.alg != ALG_WEP && 994 * RSNA-protected unicast frames should always be
939 !is_multicast_ether_addr(hdr->addr1)) 995 * sent with pairwise or station-to-station keys,
940 rx->key = NULL; 996 * but for WEP we allow using a key index as well.
997 */
998 if (rx->key &&
999 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1000 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1001 !is_multicast_ether_addr(hdr->addr1))
1002 rx->key = NULL;
1003 }
941 } 1004 }
942 1005
943 if (rx->key) { 1006 if (rx->key) {
@@ -951,8 +1014,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
951 return RX_DROP_UNUSABLE; 1014 return RX_DROP_UNUSABLE;
952 /* the hdr variable is invalid now! */ 1015 /* the hdr variable is invalid now! */
953 1016
954 switch (rx->key->conf.alg) { 1017 switch (rx->key->conf.cipher) {
955 case ALG_WEP: 1018 case WLAN_CIPHER_SUITE_WEP40:
1019 case WLAN_CIPHER_SUITE_WEP104:
956 /* Check for weak IVs if possible */ 1020 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) && 1021 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) || 1022 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -962,15 +1026,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
962 1026
963 result = ieee80211_crypto_wep_decrypt(rx); 1027 result = ieee80211_crypto_wep_decrypt(rx);
964 break; 1028 break;
965 case ALG_TKIP: 1029 case WLAN_CIPHER_SUITE_TKIP:
966 result = ieee80211_crypto_tkip_decrypt(rx); 1030 result = ieee80211_crypto_tkip_decrypt(rx);
967 break; 1031 break;
968 case ALG_CCMP: 1032 case WLAN_CIPHER_SUITE_CCMP:
969 result = ieee80211_crypto_ccmp_decrypt(rx); 1033 result = ieee80211_crypto_ccmp_decrypt(rx);
970 break; 1034 break;
971 case ALG_AES_CMAC: 1035 case WLAN_CIPHER_SUITE_AES_CMAC:
972 result = ieee80211_crypto_aes_cmac_decrypt(rx); 1036 result = ieee80211_crypto_aes_cmac_decrypt(rx);
973 break; 1037 break;
1038 default:
1039 /*
1040 * We can reach here only with HW-only algorithms
1041 * but why didn't it decrypt the frame?!
1042 */
1043 return RX_DROP_UNUSABLE;
974 } 1044 }
975 1045
976 /* either the frame has been decrypted or will be dropped */ 1046 /* either the frame has been decrypted or will be dropped */
@@ -1079,7 +1149,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1079 sta->last_rx = jiffies; 1149 sta->last_rx = jiffies;
1080 } 1150 }
1081 1151
1082 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1152 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1083 return RX_CONTINUE; 1153 return RX_CONTINUE;
1084 1154
1085 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) 1155 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
@@ -1236,6 +1306,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1236 unsigned int frag, seq; 1306 unsigned int frag, seq;
1237 struct ieee80211_fragment_entry *entry; 1307 struct ieee80211_fragment_entry *entry;
1238 struct sk_buff *skb; 1308 struct sk_buff *skb;
1309 struct ieee80211_rx_status *status;
1239 1310
1240 hdr = (struct ieee80211_hdr *)rx->skb->data; 1311 hdr = (struct ieee80211_hdr *)rx->skb->data;
1241 fc = hdr->frame_control; 1312 fc = hdr->frame_control;
@@ -1265,7 +1336,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1265 /* This is the first fragment of a new frame. */ 1336 /* This is the first fragment of a new frame. */
1266 entry = ieee80211_reassemble_add(rx->sdata, frag, seq, 1337 entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
1267 rx->queue, &(rx->skb)); 1338 rx->queue, &(rx->skb));
1268 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1339 if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
1269 ieee80211_has_protected(fc)) { 1340 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ? 1341 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue; 1342 NUM_RX_DATA_QUEUES : rx->queue;
@@ -1294,7 +1365,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1294 int i; 1365 int i;
1295 u8 pn[CCMP_PN_LEN], *rpn; 1366 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue; 1367 int queue;
1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1368 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1298 return RX_DROP_UNUSABLE; 1369 return RX_DROP_UNUSABLE;
1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1370 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
1300 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1371 for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -1335,7 +1406,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1335 } 1406 }
1336 1407
1337 /* Complete frame has been reassembled - process it now */ 1408 /* Complete frame has been reassembled - process it now */
1338 rx->flags |= IEEE80211_RX_FRAGMENTED; 1409 status = IEEE80211_SKB_RXCB(rx->skb);
1410 status->rx_flags |= IEEE80211_RX_FRAGMENTED;
1339 1411
1340 out: 1412 out:
1341 if (rx->sta) 1413 if (rx->sta)
@@ -1352,9 +1424,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
1352{ 1424{
1353 struct ieee80211_sub_if_data *sdata = rx->sdata; 1425 struct ieee80211_sub_if_data *sdata = rx->sdata;
1354 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; 1426 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control;
1427 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1355 1428
1356 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || 1429 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) ||
1357 !(rx->flags & IEEE80211_RX_RA_MATCH))) 1430 !(status->rx_flags & IEEE80211_RX_RA_MATCH)))
1358 return RX_CONTINUE; 1431 return RX_CONTINUE;
1359 1432
1360 if ((sdata->vif.type != NL80211_IFTYPE_AP) && 1433 if ((sdata->vif.type != NL80211_IFTYPE_AP) &&
@@ -1492,7 +1565,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
1492 * Allow EAPOL frames to us/the PAE group address regardless 1565 * Allow EAPOL frames to us/the PAE group address regardless
1493 * of whether the frame was encrypted or not. 1566 * of whether the frame was encrypted or not.
1494 */ 1567 */
1495 if (ehdr->h_proto == htons(ETH_P_PAE) && 1568 if (ehdr->h_proto == rx->sdata->control_port_protocol &&
1496 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 || 1569 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
1497 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) 1570 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1498 return true; 1571 return true;
@@ -1515,6 +1588,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1515 struct sk_buff *skb, *xmit_skb; 1588 struct sk_buff *skb, *xmit_skb;
1516 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 1589 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1517 struct sta_info *dsta; 1590 struct sta_info *dsta;
1591 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1518 1592
1519 skb = rx->skb; 1593 skb = rx->skb;
1520 xmit_skb = NULL; 1594 xmit_skb = NULL;
@@ -1522,7 +1596,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1522 if ((sdata->vif.type == NL80211_IFTYPE_AP || 1596 if ((sdata->vif.type == NL80211_IFTYPE_AP ||
1523 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 1597 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
1524 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && 1598 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
1525 (rx->flags & IEEE80211_RX_RA_MATCH) && 1599 (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
1526 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { 1600 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
1527 if (is_multicast_ether_addr(ehdr->h_dest)) { 1601 if (is_multicast_ether_addr(ehdr->h_dest)) {
1528 /* 1602 /*
@@ -1599,6 +1673,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1599 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 1673 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1600 __le16 fc = hdr->frame_control; 1674 __le16 fc = hdr->frame_control;
1601 struct sk_buff_head frame_list; 1675 struct sk_buff_head frame_list;
1676 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1602 1677
1603 if (unlikely(!ieee80211_is_data(fc))) 1678 if (unlikely(!ieee80211_is_data(fc)))
1604 return RX_CONTINUE; 1679 return RX_CONTINUE;
@@ -1606,7 +1681,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1606 if (unlikely(!ieee80211_is_data_present(fc))) 1681 if (unlikely(!ieee80211_is_data_present(fc)))
1607 return RX_DROP_MONITOR; 1682 return RX_DROP_MONITOR;
1608 1683
1609 if (!(rx->flags & IEEE80211_RX_AMSDU)) 1684 if (!(status->rx_flags & IEEE80211_RX_AMSDU))
1610 return RX_CONTINUE; 1685 return RX_CONTINUE;
1611 1686
1612 if (ieee80211_has_a4(hdr->frame_control) && 1687 if (ieee80211_has_a4(hdr->frame_control) &&
@@ -1657,6 +1732,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1657 struct sk_buff *skb = rx->skb, *fwd_skb; 1732 struct sk_buff *skb = rx->skb, *fwd_skb;
1658 struct ieee80211_local *local = rx->local; 1733 struct ieee80211_local *local = rx->local;
1659 struct ieee80211_sub_if_data *sdata = rx->sdata; 1734 struct ieee80211_sub_if_data *sdata = rx->sdata;
1735 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1660 1736
1661 hdr = (struct ieee80211_hdr *) skb->data; 1737 hdr = (struct ieee80211_hdr *) skb->data;
1662 hdrlen = ieee80211_hdrlen(hdr->frame_control); 1738 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1702,7 +1778,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1702 1778
1703 mesh_hdr->ttl--; 1779 mesh_hdr->ttl--;
1704 1780
1705 if (rx->flags & IEEE80211_RX_RA_MATCH) { 1781 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
1706 if (!mesh_hdr->ttl) 1782 if (!mesh_hdr->ttl)
1707 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, 1783 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
1708 dropped_frames_ttl); 1784 dropped_frames_ttl);
@@ -1909,13 +1985,38 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1909} 1985}
1910 1986
1911static ieee80211_rx_result debug_noinline 1987static ieee80211_rx_result debug_noinline
1988ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
1989{
1990 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1991 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1992
1993 /*
1994 * From here on, look only at management frames.
1995 * Data and control frames are already handled,
1996 * and unknown (reserved) frames are useless.
1997 */
1998 if (rx->skb->len < 24)
1999 return RX_DROP_MONITOR;
2000
2001 if (!ieee80211_is_mgmt(mgmt->frame_control))
2002 return RX_DROP_MONITOR;
2003
2004 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
2005 return RX_DROP_MONITOR;
2006
2007 if (ieee80211_drop_unencrypted_mgmt(rx))
2008 return RX_DROP_UNUSABLE;
2009
2010 return RX_CONTINUE;
2011}
2012
2013static ieee80211_rx_result debug_noinline
1912ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 2014ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1913{ 2015{
1914 struct ieee80211_local *local = rx->local; 2016 struct ieee80211_local *local = rx->local;
1915 struct ieee80211_sub_if_data *sdata = rx->sdata; 2017 struct ieee80211_sub_if_data *sdata = rx->sdata;
1916 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 2018 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1917 struct sk_buff *nskb; 2019 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1918 struct ieee80211_rx_status *status;
1919 int len = rx->skb->len; 2020 int len = rx->skb->len;
1920 2021
1921 if (!ieee80211_is_action(mgmt->frame_control)) 2022 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1928,10 +2029,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1928 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) 2029 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
1929 return RX_DROP_UNUSABLE; 2030 return RX_DROP_UNUSABLE;
1930 2031
1931 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 2032 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1932 return RX_DROP_UNUSABLE;
1933
1934 if (ieee80211_drop_unencrypted_mgmt(rx))
1935 return RX_DROP_UNUSABLE; 2033 return RX_DROP_UNUSABLE;
1936 2034
1937 switch (mgmt->u.action.category) { 2035 switch (mgmt->u.action.category) {
@@ -2024,17 +2122,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2024 goto queue; 2122 goto queue;
2025 } 2123 }
2026 2124
2125 return RX_CONTINUE;
2126
2027 invalid: 2127 invalid:
2028 /* 2128 status->rx_flags |= IEEE80211_RX_MALFORMED_ACTION_FRM;
2029 * For AP mode, hostapd is responsible for handling any action 2129 /* will return in the next handlers */
2030 * frames that we didn't handle, including returning unknown 2130 return RX_CONTINUE;
2031 * ones. For all other modes we will return them to the sender, 2131
2032 * setting the 0x80 bit in the action category, as required by 2132 handled:
2033 * 802.11-2007 7.3.1.11. 2133 if (rx->sta)
2034 */ 2134 rx->sta->rx_packets++;
2035 if (sdata->vif.type == NL80211_IFTYPE_AP || 2135 dev_kfree_skb(rx->skb);
2036 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2136 return RX_QUEUED;
2037 return RX_DROP_MONITOR; 2137
2138 queue:
2139 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2140 skb_queue_tail(&sdata->skb_queue, rx->skb);
2141 ieee80211_queue_work(&local->hw, &sdata->work);
2142 if (rx->sta)
2143 rx->sta->rx_packets++;
2144 return RX_QUEUED;
2145}
2146
2147static ieee80211_rx_result debug_noinline
2148ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2149{
2150 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2151
2152 /* skip known-bad action frames and return them in the next handler */
2153 if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM)
2154 return RX_CONTINUE;
2038 2155
2039 /* 2156 /*
2040 * Getting here means the kernel doesn't know how to handle 2157 * Getting here means the kernel doesn't know how to handle
@@ -2042,12 +2159,46 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2042 * so userspace can register for those to know whether ones 2159 * so userspace can register for those to know whether ones
2043 * it transmitted were processed or returned. 2160 * it transmitted were processed or returned.
2044 */ 2161 */
2045 status = IEEE80211_SKB_RXCB(rx->skb);
2046 2162
2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq, 2163 if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
2048 rx->skb->data, rx->skb->len, 2164 rx->skb->data, rx->skb->len,
2049 GFP_ATOMIC)) 2165 GFP_ATOMIC)) {
2050 goto handled; 2166 if (rx->sta)
2167 rx->sta->rx_packets++;
2168 dev_kfree_skb(rx->skb);
2169 return RX_QUEUED;
2170 }
2171
2172
2173 return RX_CONTINUE;
2174}
2175
2176static ieee80211_rx_result debug_noinline
2177ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2178{
2179 struct ieee80211_local *local = rx->local;
2180 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
2181 struct sk_buff *nskb;
2182 struct ieee80211_sub_if_data *sdata = rx->sdata;
2183 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2184
2185 if (!ieee80211_is_action(mgmt->frame_control))
2186 return RX_CONTINUE;
2187
2188 /*
2189 * For AP mode, hostapd is responsible for handling any action
2190 * frames that we didn't handle, including returning unknown
2191 * ones. For all other modes we will return them to the sender,
2192 * setting the 0x80 bit in the action category, as required by
2193 * 802.11-2007 7.3.1.11.
2194 * Newer versions of hostapd shall also use the management frame
2195 * registration mechanisms, but older ones still use cooked
2196 * monitor interfaces so push all frames there.
2197 */
2198 if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) &&
2199 (sdata->vif.type == NL80211_IFTYPE_AP ||
2200 sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
2201 return RX_DROP_MONITOR;
2051 2202
2052 /* do not return rejected action frames */ 2203 /* do not return rejected action frames */
2053 if (mgmt->u.action.category & 0x80) 2204 if (mgmt->u.action.category & 0x80)
@@ -2066,20 +2217,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2066 2217
2067 ieee80211_tx_skb(rx->sdata, nskb); 2218 ieee80211_tx_skb(rx->sdata, nskb);
2068 } 2219 }
2069
2070 handled:
2071 if (rx->sta)
2072 rx->sta->rx_packets++;
2073 dev_kfree_skb(rx->skb); 2220 dev_kfree_skb(rx->skb);
2074 return RX_QUEUED; 2221 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2083} 2222}
2084 2223
2085static ieee80211_rx_result debug_noinline 2224static ieee80211_rx_result debug_noinline
@@ -2090,15 +2229,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; 2229 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype; 2230 __le16 stype;
2092 2231
2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2094 return RX_DROP_MONITOR;
2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2100 return RX_DROP_UNUSABLE;
2101
2102 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); 2232 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
2103 if (rxs != RX_CONTINUE) 2233 if (rxs != RX_CONTINUE)
2104 return rxs; 2234 return rxs;
@@ -2199,6 +2329,14 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2199 struct net_device *prev_dev = NULL; 2329 struct net_device *prev_dev = NULL;
2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2330 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2201 2331
2332 /*
2333 * If cooked monitor has been processed already, then
2334 * don't do it again. If not, set the flag.
2335 */
2336 if (rx->flags & IEEE80211_RX_CMNTR)
2337 goto out_free_skb;
2338 rx->flags |= IEEE80211_RX_CMNTR;
2339
2202 if (skb_headroom(skb) < sizeof(*rthdr) && 2340 if (skb_headroom(skb) < sizeof(*rthdr) &&
2203 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) 2341 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC))
2204 goto out_free_skb; 2342 goto out_free_skb;
@@ -2253,29 +2391,53 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2253 if (prev_dev) { 2391 if (prev_dev) {
2254 skb->dev = prev_dev; 2392 skb->dev = prev_dev;
2255 netif_receive_skb(skb); 2393 netif_receive_skb(skb);
2256 skb = NULL; 2394 return;
2257 } else 2395 }
2258 goto out_free_skb;
2259
2260 return;
2261 2396
2262 out_free_skb: 2397 out_free_skb:
2263 dev_kfree_skb(skb); 2398 dev_kfree_skb(skb);
2264} 2399}
2265 2400
2401static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
2402 ieee80211_rx_result res)
2403{
2404 switch (res) {
2405 case RX_DROP_MONITOR:
2406 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2407 if (rx->sta)
2408 rx->sta->rx_dropped++;
2409 /* fall through */
2410 case RX_CONTINUE: {
2411 struct ieee80211_rate *rate = NULL;
2412 struct ieee80211_supported_band *sband;
2413 struct ieee80211_rx_status *status;
2414
2415 status = IEEE80211_SKB_RXCB((rx->skb));
2416
2417 sband = rx->local->hw.wiphy->bands[status->band];
2418 if (!(status->flag & RX_FLAG_HT))
2419 rate = &sband->bitrates[status->rate_idx];
2420
2421 ieee80211_rx_cooked_monitor(rx, rate);
2422 break;
2423 }
2424 case RX_DROP_UNUSABLE:
2425 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2426 if (rx->sta)
2427 rx->sta->rx_dropped++;
2428 dev_kfree_skb(rx->skb);
2429 break;
2430 case RX_QUEUED:
2431 I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
2432 break;
2433 }
2434}
2266 2435
2267static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, 2436static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2268 struct ieee80211_rx_data *rx, 2437 struct sk_buff_head *frames)
2269 struct sk_buff *skb,
2270 struct ieee80211_rate *rate)
2271{ 2438{
2272 struct sk_buff_head reorder_release;
2273 ieee80211_rx_result res = RX_DROP_MONITOR; 2439 ieee80211_rx_result res = RX_DROP_MONITOR;
2274 2440 struct sk_buff *skb;
2275 __skb_queue_head_init(&reorder_release);
2276
2277 rx->skb = skb;
2278 rx->sdata = sdata;
2279 2441
2280#define CALL_RXH(rxh) \ 2442#define CALL_RXH(rxh) \
2281 do { \ 2443 do { \
@@ -2284,23 +2446,14 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2284 goto rxh_next; \ 2446 goto rxh_next; \
2285 } while (0); 2447 } while (0);
2286 2448
2287 /* 2449 while ((skb = __skb_dequeue(frames))) {
2288 * NB: the rxh_next label works even if we jump
2289 * to it from here because then the list will
2290 * be empty, which is a trivial check
2291 */
2292 CALL_RXH(ieee80211_rx_h_passive_scan)
2293 CALL_RXH(ieee80211_rx_h_check)
2294
2295 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2296
2297 while ((skb = __skb_dequeue(&reorder_release))) {
2298 /* 2450 /*
2299 * all the other fields are valid across frames 2451 * all the other fields are valid across frames
2300 * that belong to an aMPDU since they are on the 2452 * that belong to an aMPDU since they are on the
2301 * same TID from the same station 2453 * same TID from the same station
2302 */ 2454 */
2303 rx->skb = skb; 2455 rx->skb = skb;
2456 rx->flags = 0;
2304 2457
2305 CALL_RXH(ieee80211_rx_h_decrypt) 2458 CALL_RXH(ieee80211_rx_h_decrypt)
2306 CALL_RXH(ieee80211_rx_h_check_more_data) 2459 CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2312,50 +2465,92 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2312 CALL_RXH(ieee80211_rx_h_remove_qos_control) 2465 CALL_RXH(ieee80211_rx_h_remove_qos_control)
2313 CALL_RXH(ieee80211_rx_h_amsdu) 2466 CALL_RXH(ieee80211_rx_h_amsdu)
2314#ifdef CONFIG_MAC80211_MESH 2467#ifdef CONFIG_MAC80211_MESH
2315 if (ieee80211_vif_is_mesh(&sdata->vif)) 2468 if (ieee80211_vif_is_mesh(&rx->sdata->vif))
2316 CALL_RXH(ieee80211_rx_h_mesh_fwding); 2469 CALL_RXH(ieee80211_rx_h_mesh_fwding);
2317#endif 2470#endif
2318 CALL_RXH(ieee80211_rx_h_data) 2471 CALL_RXH(ieee80211_rx_h_data)
2319 2472
2320 /* special treatment -- needs the queue */ 2473 /* special treatment -- needs the queue */
2321 res = ieee80211_rx_h_ctrl(rx, &reorder_release); 2474 res = ieee80211_rx_h_ctrl(rx, frames);
2322 if (res != RX_CONTINUE) 2475 if (res != RX_CONTINUE)
2323 goto rxh_next; 2476 goto rxh_next;
2324 2477
2478 CALL_RXH(ieee80211_rx_h_mgmt_check)
2325 CALL_RXH(ieee80211_rx_h_action) 2479 CALL_RXH(ieee80211_rx_h_action)
2480 CALL_RXH(ieee80211_rx_h_userspace_mgmt)
2481 CALL_RXH(ieee80211_rx_h_action_return)
2326 CALL_RXH(ieee80211_rx_h_mgmt) 2482 CALL_RXH(ieee80211_rx_h_mgmt)
2327 2483
2484 rxh_next:
2485 ieee80211_rx_handlers_result(rx, res);
2486
2328#undef CALL_RXH 2487#undef CALL_RXH
2488 }
2489}
2490
2491static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
2492{
2493 struct sk_buff_head reorder_release;
2494 ieee80211_rx_result res = RX_DROP_MONITOR;
2495
2496 __skb_queue_head_init(&reorder_release);
2497
2498#define CALL_RXH(rxh) \
2499 do { \
2500 res = rxh(rx); \
2501 if (res != RX_CONTINUE) \
2502 goto rxh_next; \
2503 } while (0);
2504
2505 CALL_RXH(ieee80211_rx_h_passive_scan)
2506 CALL_RXH(ieee80211_rx_h_check)
2507
2508 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2509
2510 ieee80211_rx_handlers(rx, &reorder_release);
2511 return;
2329 2512
2330 rxh_next: 2513 rxh_next:
2331 switch (res) { 2514 ieee80211_rx_handlers_result(rx, res);
2332 case RX_DROP_MONITOR: 2515
2333 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2516#undef CALL_RXH
2334 if (rx->sta) 2517}
2335 rx->sta->rx_dropped++; 2518
2336 /* fall through */ 2519/*
2337 case RX_CONTINUE: 2520 * This function makes calls into the RX path. Therefore the
2338 ieee80211_rx_cooked_monitor(rx, rate); 2521 * caller must hold the sta_info->lock and everything has to
2339 break; 2522 * be under rcu_read_lock protection as well.
2340 case RX_DROP_UNUSABLE: 2523 */
2341 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2524void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2342 if (rx->sta) 2525{
2343 rx->sta->rx_dropped++; 2526 struct sk_buff_head frames;
2344 dev_kfree_skb(rx->skb); 2527 struct ieee80211_rx_data rx = {
2345 break; 2528 .sta = sta,
2346 case RX_QUEUED: 2529 .sdata = sta->sdata,
2347 I802_DEBUG_INC(sdata->local->rx_handlers_queued); 2530 .local = sta->local,
2348 break; 2531 .queue = tid,
2349 } 2532 };
2350 } 2533 struct tid_ampdu_rx *tid_agg_rx;
2534
2535 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
2536 if (!tid_agg_rx)
2537 return;
2538
2539 __skb_queue_head_init(&frames);
2540
2541 spin_lock(&tid_agg_rx->reorder_lock);
2542 ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
2543 spin_unlock(&tid_agg_rx->reorder_lock);
2544
2545 ieee80211_rx_handlers(&rx, &frames);
2351} 2546}
2352 2547
2353/* main receive path */ 2548/* main receive path */
2354 2549
2355static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, 2550static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2356 struct ieee80211_rx_data *rx,
2357 struct ieee80211_hdr *hdr) 2551 struct ieee80211_hdr *hdr)
2358{ 2552{
2553 struct ieee80211_sub_if_data *sdata = rx->sdata;
2359 struct sk_buff *skb = rx->skb; 2554 struct sk_buff *skb = rx->skb;
2360 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2555 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2361 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); 2556 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
@@ -2369,7 +2564,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2369 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { 2564 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
2370 if (!(sdata->dev->flags & IFF_PROMISC)) 2565 if (!(sdata->dev->flags & IFF_PROMISC))
2371 return 0; 2566 return 0;
2372 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2567 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2373 } 2568 }
2374 break; 2569 break;
2375 case NL80211_IFTYPE_ADHOC: 2570 case NL80211_IFTYPE_ADHOC:
@@ -2379,15 +2574,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2379 return 1; 2574 return 1;
2380 } 2575 }
2381 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { 2576 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
2382 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2577 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2383 return 0; 2578 return 0;
2384 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2579 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2385 } else if (!multicast && 2580 } else if (!multicast &&
2386 compare_ether_addr(sdata->vif.addr, 2581 compare_ether_addr(sdata->vif.addr,
2387 hdr->addr1) != 0) { 2582 hdr->addr1) != 0) {
2388 if (!(sdata->dev->flags & IFF_PROMISC)) 2583 if (!(sdata->dev->flags & IFF_PROMISC))
2389 return 0; 2584 return 0;
2390 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2585 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2391 } else if (!rx->sta) { 2586 } else if (!rx->sta) {
2392 int rate_idx; 2587 int rate_idx;
2393 if (status->flag & RX_FLAG_HT) 2588 if (status->flag & RX_FLAG_HT)
@@ -2405,7 +2600,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2405 if (!(sdata->dev->flags & IFF_PROMISC)) 2600 if (!(sdata->dev->flags & IFF_PROMISC))
2406 return 0; 2601 return 0;
2407 2602
2408 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2603 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2409 } 2604 }
2410 break; 2605 break;
2411 case NL80211_IFTYPE_AP_VLAN: 2606 case NL80211_IFTYPE_AP_VLAN:
@@ -2416,9 +2611,9 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2416 return 0; 2611 return 0;
2417 } else if (!ieee80211_bssid_match(bssid, 2612 } else if (!ieee80211_bssid_match(bssid,
2418 sdata->vif.addr)) { 2613 sdata->vif.addr)) {
2419 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2614 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2420 return 0; 2615 return 0;
2421 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2616 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2422 } 2617 }
2423 break; 2618 break;
2424 case NL80211_IFTYPE_WDS: 2619 case NL80211_IFTYPE_WDS:
@@ -2427,9 +2622,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2427 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) 2622 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
2428 return 0; 2623 return 0;
2429 break; 2624 break;
2430 case NL80211_IFTYPE_MONITOR: 2625 default:
2431 case NL80211_IFTYPE_UNSPECIFIED:
2432 case __NL80211_IFTYPE_AFTER_LAST:
2433 /* should never get here */ 2626 /* should never get here */
2434 WARN_ON(1); 2627 WARN_ON(1);
2435 break; 2628 break;
@@ -2439,12 +2632,56 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2439} 2632}
2440 2633
2441/* 2634/*
2635 * This function returns whether or not the SKB
2636 * was destined for RX processing or not, which,
2637 * if consume is true, is equivalent to whether
2638 * or not the skb was consumed.
2639 */
2640static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
2641 struct sk_buff *skb, bool consume)
2642{
2643 struct ieee80211_local *local = rx->local;
2644 struct ieee80211_sub_if_data *sdata = rx->sdata;
2645 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2646 struct ieee80211_hdr *hdr = (void *)skb->data;
2647 int prepares;
2648
2649 rx->skb = skb;
2650 status->rx_flags |= IEEE80211_RX_RA_MATCH;
2651 prepares = prepare_for_handlers(rx, hdr);
2652
2653 if (!prepares)
2654 return false;
2655
2656 if (status->flag & RX_FLAG_MMIC_ERROR) {
2657 if (status->rx_flags & IEEE80211_RX_RA_MATCH)
2658 ieee80211_rx_michael_mic_report(hdr, rx);
2659 return false;
2660 }
2661
2662 if (!consume) {
2663 skb = skb_copy(skb, GFP_ATOMIC);
2664 if (!skb) {
2665 if (net_ratelimit())
2666 wiphy_debug(local->hw.wiphy,
2667 "failed to copy multicast frame for %s\n",
2668 sdata->name);
2669 return true;
2670 }
2671
2672 rx->skb = skb;
2673 }
2674
2675 ieee80211_invoke_rx_handlers(rx);
2676 return true;
2677}
2678
2679/*
2442 * This is the actual Rx frames handler. as it blongs to Rx path it must 2680 * This is the actual Rx frames handler. as it blongs to Rx path it must
2443 * be called with rcu_read_lock protection. 2681 * be called with rcu_read_lock protection.
2444 */ 2682 */
2445static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, 2683static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2446 struct sk_buff *skb, 2684 struct sk_buff *skb)
2447 struct ieee80211_rate *rate)
2448{ 2685{
2449 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2686 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2450 struct ieee80211_local *local = hw_to_local(hw); 2687 struct ieee80211_local *local = hw_to_local(hw);
@@ -2452,11 +2689,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2452 struct ieee80211_hdr *hdr; 2689 struct ieee80211_hdr *hdr;
2453 __le16 fc; 2690 __le16 fc;
2454 struct ieee80211_rx_data rx; 2691 struct ieee80211_rx_data rx;
2455 int prepares; 2692 struct ieee80211_sub_if_data *prev;
2456 struct ieee80211_sub_if_data *prev = NULL; 2693 struct sta_info *sta, *tmp, *prev_sta;
2457 struct sk_buff *skb_new;
2458 struct sta_info *sta, *tmp;
2459 bool found_sta = false;
2460 int err = 0; 2694 int err = 0;
2461 2695
2462 fc = ((struct ieee80211_hdr *)skb->data)->frame_control; 2696 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
@@ -2469,7 +2703,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2469 2703
2470 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2704 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2471 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2705 test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
2472 rx.flags |= IEEE80211_RX_IN_SCAN; 2706 status->rx_flags |= IEEE80211_RX_IN_SCAN;
2473 2707
2474 if (ieee80211_is_mgmt(fc)) 2708 if (ieee80211_is_mgmt(fc))
2475 err = skb_linearize(skb); 2709 err = skb_linearize(skb);
@@ -2486,91 +2720,67 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2486 ieee80211_verify_alignment(&rx); 2720 ieee80211_verify_alignment(&rx);
2487 2721
2488 if (ieee80211_is_data(fc)) { 2722 if (ieee80211_is_data(fc)) {
2723 prev_sta = NULL;
2724
2489 for_each_sta_info(local, hdr->addr2, sta, tmp) { 2725 for_each_sta_info(local, hdr->addr2, sta, tmp) {
2490 rx.sta = sta; 2726 if (!prev_sta) {
2491 found_sta = true; 2727 prev_sta = sta;
2492 rx.sdata = sta->sdata;
2493
2494 rx.flags |= IEEE80211_RX_RA_MATCH;
2495 prepares = prepare_for_handlers(rx.sdata, &rx, hdr);
2496 if (prepares) {
2497 if (status->flag & RX_FLAG_MMIC_ERROR) {
2498 if (rx.flags & IEEE80211_RX_RA_MATCH)
2499 ieee80211_rx_michael_mic_report(hdr, &rx);
2500 } else
2501 prev = rx.sdata;
2502 }
2503 }
2504 }
2505 if (!found_sta) {
2506 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2507 if (!ieee80211_sdata_running(sdata))
2508 continue; 2728 continue;
2729 }
2509 2730
2510 if (sdata->vif.type == NL80211_IFTYPE_MONITOR || 2731 rx.sta = prev_sta;
2511 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2732 rx.sdata = prev_sta->sdata;
2512 continue; 2733 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2513 2734
2514 /* 2735 prev_sta = sta;
2515 * frame is destined for this interface, but if it's 2736 }
2516 * not also for the previous one we handle that after
2517 * the loop to avoid copying the SKB once too much
2518 */
2519 2737
2520 if (!prev) { 2738 if (prev_sta) {
2521 prev = sdata; 2739 rx.sta = prev_sta;
2522 continue; 2740 rx.sdata = prev_sta->sdata;
2523 }
2524 2741
2525 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2742 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2743 return;
2744 }
2745 }
2526 2746
2527 rx.flags |= IEEE80211_RX_RA_MATCH; 2747 prev = NULL;
2528 prepares = prepare_for_handlers(prev, &rx, hdr);
2529 2748
2530 if (!prepares) 2749 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2531 goto next; 2750 if (!ieee80211_sdata_running(sdata))
2751 continue;
2532 2752
2533 if (status->flag & RX_FLAG_MMIC_ERROR) { 2753 if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
2534 rx.sdata = prev; 2754 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2535 if (rx.flags & IEEE80211_RX_RA_MATCH) 2755 continue;
2536 ieee80211_rx_michael_mic_report(hdr,
2537 &rx);
2538 goto next;
2539 }
2540 2756
2541 /* 2757 /*
2542 * frame was destined for the previous interface 2758 * frame is destined for this interface, but if it's
2543 * so invoke RX handlers for it 2759 * not also for the previous one we handle that after
2544 */ 2760 * the loop to avoid copying the SKB once too much
2761 */
2545 2762
2546 skb_new = skb_copy(skb, GFP_ATOMIC); 2763 if (!prev) {
2547 if (!skb_new) {
2548 if (net_ratelimit())
2549 printk(KERN_DEBUG "%s: failed to copy "
2550 "multicast frame for %s\n",
2551 wiphy_name(local->hw.wiphy),
2552 prev->name);
2553 goto next;
2554 }
2555 ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
2556next:
2557 prev = sdata; 2764 prev = sdata;
2765 continue;
2558 } 2766 }
2559 2767
2560 if (prev) { 2768 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2561 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2769 rx.sdata = prev;
2770 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2562 2771
2563 rx.flags |= IEEE80211_RX_RA_MATCH; 2772 prev = sdata;
2564 prepares = prepare_for_handlers(prev, &rx, hdr); 2773 }
2565 2774
2566 if (!prepares) 2775 if (prev) {
2567 prev = NULL; 2776 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2568 } 2777 rx.sdata = prev;
2778
2779 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2780 return;
2569 } 2781 }
2570 if (prev) 2782
2571 ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); 2783 dev_kfree_skb(skb);
2572 else
2573 dev_kfree_skb(skb);
2574} 2784}
2575 2785
2576/* 2786/*
@@ -2611,30 +2821,41 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2611 if (WARN_ON(!local->started)) 2821 if (WARN_ON(!local->started))
2612 goto drop; 2822 goto drop;
2613 2823
2614 if (status->flag & RX_FLAG_HT) { 2824 if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) {
2615 /* 2825 /*
2616 * rate_idx is MCS index, which can be [0-76] as documented on: 2826 * Validate the rate, unless a PLCP error means that
2617 * 2827 * we probably can't have a valid rate here anyway.
2618 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2619 *
2620 * Anything else would be some sort of driver or hardware error.
2621 * The driver should catch hardware errors.
2622 */ 2828 */
2623 if (WARN((status->rate_idx < 0 || 2829
2624 status->rate_idx > 76), 2830 if (status->flag & RX_FLAG_HT) {
2625 "Rate marked as an HT rate but passed " 2831 /*
2626 "status->rate_idx is not " 2832 * rate_idx is MCS index, which can be [0-76]
2627 "an MCS index [0-76]: %d (0x%02x)\n", 2833 * as documented on:
2628 status->rate_idx, 2834 *
2629 status->rate_idx)) 2835 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2630 goto drop; 2836 *
2631 } else { 2837 * Anything else would be some sort of driver or
2632 if (WARN_ON(status->rate_idx < 0 || 2838 * hardware error. The driver should catch hardware
2633 status->rate_idx >= sband->n_bitrates)) 2839 * errors.
2634 goto drop; 2840 */
2635 rate = &sband->bitrates[status->rate_idx]; 2841 if (WARN((status->rate_idx < 0 ||
2842 status->rate_idx > 76),
2843 "Rate marked as an HT rate but passed "
2844 "status->rate_idx is not "
2845 "an MCS index [0-76]: %d (0x%02x)\n",
2846 status->rate_idx,
2847 status->rate_idx))
2848 goto drop;
2849 } else {
2850 if (WARN_ON(status->rate_idx < 0 ||
2851 status->rate_idx >= sband->n_bitrates))
2852 goto drop;
2853 rate = &sband->bitrates[status->rate_idx];
2854 }
2636 } 2855 }
2637 2856
2857 status->rx_flags = 0;
2858
2638 /* 2859 /*
2639 * key references and virtual interfaces are protected using RCU 2860 * key references and virtual interfaces are protected using RCU
2640 * and this requires that we are in a read-side RCU section during 2861 * and this requires that we are in a read-side RCU section during
@@ -2654,7 +2875,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2654 return; 2875 return;
2655 } 2876 }
2656 2877
2657 __ieee80211_rx_handle_packet(hw, skb, rate); 2878 __ieee80211_rx_handle_packet(hw, skb);
2658 2879
2659 rcu_read_unlock(); 2880 rcu_read_unlock();
2660 2881
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 872d7b6ef6b3..fb274db77e3c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -242,20 +242,19 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
242 local->hw_scan_req->n_channels = n_chans; 242 local->hw_scan_req->n_channels = n_chans;
243 243
244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, 244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
245 req->ie, req->ie_len, band); 245 req->ie, req->ie_len, band, (u32) -1,
246 0);
246 local->hw_scan_req->ie_len = ielen; 247 local->hw_scan_req->ie_len = ielen;
247 248
248 return true; 249 return true;
249} 250}
250 251
251void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) 252static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
253 bool was_hw_scan)
252{ 254{
253 struct ieee80211_local *local = hw_to_local(hw); 255 struct ieee80211_local *local = hw_to_local(hw);
254 bool was_hw_scan;
255
256 trace_api_scan_completed(local, aborted);
257 256
258 mutex_lock(&local->scan_mtx); 257 lockdep_assert_held(&local->mtx);
259 258
260 /* 259 /*
261 * It's ok to abort a not-yet-running scan (that 260 * It's ok to abort a not-yet-running scan (that
@@ -266,17 +265,13 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
266 if (WARN_ON(!local->scanning && !aborted)) 265 if (WARN_ON(!local->scanning && !aborted))
267 aborted = true; 266 aborted = true;
268 267
269 if (WARN_ON(!local->scan_req)) { 268 if (WARN_ON(!local->scan_req))
270 mutex_unlock(&local->scan_mtx); 269 return false;
271 return;
272 }
273 270
274 was_hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
275 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 271 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
276 ieee80211_queue_delayed_work(&local->hw, 272 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req);
277 &local->scan_work, 0); 273 if (rc == 0)
278 mutex_unlock(&local->scan_mtx); 274 return false;
279 return;
280 } 275 }
281 276
282 kfree(local->hw_scan_req); 277 kfree(local->hw_scan_req);
@@ -290,26 +285,42 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
290 local->scanning = 0; 285 local->scanning = 0;
291 local->scan_channel = NULL; 286 local->scan_channel = NULL;
292 287
293 /* we only have to protect scan_req and hw/sw scan */ 288 return true;
294 mutex_unlock(&local->scan_mtx); 289}
295
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (was_hw_scan)
298 goto done;
299
300 ieee80211_configure_filter(local);
301 290
302 drv_sw_scan_complete(local); 291static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw,
292 bool was_hw_scan)
293{
294 struct ieee80211_local *local = hw_to_local(hw);
303 295
304 ieee80211_offchannel_return(local, true); 296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (!was_hw_scan) {
298 ieee80211_configure_filter(local);
299 drv_sw_scan_complete(local);
300 ieee80211_offchannel_return(local, true);
301 }
305 302
306 done: 303 mutex_lock(&local->mtx);
307 ieee80211_recalc_idle(local); 304 ieee80211_recalc_idle(local);
305 mutex_unlock(&local->mtx);
306
308 ieee80211_mlme_notify_scan_completed(local); 307 ieee80211_mlme_notify_scan_completed(local);
309 ieee80211_ibss_notify_scan_completed(local); 308 ieee80211_ibss_notify_scan_completed(local);
310 ieee80211_mesh_notify_scan_completed(local); 309 ieee80211_mesh_notify_scan_completed(local);
311 ieee80211_queue_work(&local->hw, &local->work_work); 310 ieee80211_queue_work(&local->hw, &local->work_work);
312} 311}
312
313void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
314{
315 struct ieee80211_local *local = hw_to_local(hw);
316
317 trace_api_scan_completed(local, aborted);
318
319 set_bit(SCAN_COMPLETED, &local->scanning);
320 if (aborted)
321 set_bit(SCAN_ABORTED, &local->scanning);
322 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
323}
313EXPORT_SYMBOL(ieee80211_scan_completed); 324EXPORT_SYMBOL(ieee80211_scan_completed);
314 325
315static int ieee80211_start_sw_scan(struct ieee80211_local *local) 326static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -353,6 +364,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
353 struct ieee80211_local *local = sdata->local; 364 struct ieee80211_local *local = sdata->local;
354 int rc; 365 int rc;
355 366
367 lockdep_assert_held(&local->mtx);
368
356 if (local->scan_req) 369 if (local->scan_req)
357 return -EBUSY; 370 return -EBUSY;
358 371
@@ -434,8 +447,8 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
434 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; 447 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
435} 448}
436 449
437static int ieee80211_scan_state_decision(struct ieee80211_local *local, 450static void ieee80211_scan_state_decision(struct ieee80211_local *local,
438 unsigned long *next_delay) 451 unsigned long *next_delay)
439{ 452{
440 bool associated = false; 453 bool associated = false;
441 bool tx_empty = true; 454 bool tx_empty = true;
@@ -445,12 +458,6 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
445 struct ieee80211_sub_if_data *sdata; 458 struct ieee80211_sub_if_data *sdata;
446 struct ieee80211_channel *next_chan; 459 struct ieee80211_channel *next_chan;
447 460
448 /* if no more bands/channels left, complete scan and advance to the idle state */
449 if (local->scan_channel_idx >= local->scan_req->n_channels) {
450 ieee80211_scan_completed(&local->hw, false);
451 return 1;
452 }
453
454 /* 461 /*
455 * check if at least one STA interface is associated, 462 * check if at least one STA interface is associated,
456 * check if at least one STA interface has pending tx frames 463 * check if at least one STA interface has pending tx frames
@@ -522,7 +529,6 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
522 } 529 }
523 530
524 *next_delay = 0; 531 *next_delay = 0;
525 return 0;
526} 532}
527 533
528static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, 534static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
@@ -638,21 +644,18 @@ void ieee80211_scan_work(struct work_struct *work)
638 container_of(work, struct ieee80211_local, scan_work.work); 644 container_of(work, struct ieee80211_local, scan_work.work);
639 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 645 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
640 unsigned long next_delay = 0; 646 unsigned long next_delay = 0;
647 bool aborted, hw_scan, finish;
641 648
642 mutex_lock(&local->scan_mtx); 649 mutex_lock(&local->mtx);
643 if (!sdata || !local->scan_req) {
644 mutex_unlock(&local->scan_mtx);
645 return;
646 }
647 650
648 if (local->hw_scan_req) { 651 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req); 652 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
650 mutex_unlock(&local->scan_mtx); 653 goto out_complete;
651 if (rc)
652 ieee80211_scan_completed(&local->hw, true);
653 return;
654 } 654 }
655 655
656 if (!sdata || !local->scan_req)
657 goto out;
658
656 if (local->scan_req && !local->scanning) { 659 if (local->scan_req && !local->scanning) {
657 struct cfg80211_scan_request *req = local->scan_req; 660 struct cfg80211_scan_request *req = local->scan_req;
658 int rc; 661 int rc;
@@ -661,21 +664,21 @@ void ieee80211_scan_work(struct work_struct *work)
661 local->scan_sdata = NULL; 664 local->scan_sdata = NULL;
662 665
663 rc = __ieee80211_start_scan(sdata, req); 666 rc = __ieee80211_start_scan(sdata, req);
664 mutex_unlock(&local->scan_mtx); 667 if (rc) {
665 668 /* need to complete scan in cfg80211 */
666 if (rc) 669 local->scan_req = req;
667 ieee80211_scan_completed(&local->hw, true); 670 aborted = true;
668 return; 671 goto out_complete;
672 } else
673 goto out;
669 } 674 }
670 675
671 mutex_unlock(&local->scan_mtx);
672
673 /* 676 /*
674 * Avoid re-scheduling when the sdata is going away. 677 * Avoid re-scheduling when the sdata is going away.
675 */ 678 */
676 if (!ieee80211_sdata_running(sdata)) { 679 if (!ieee80211_sdata_running(sdata)) {
677 ieee80211_scan_completed(&local->hw, true); 680 aborted = true;
678 return; 681 goto out_complete;
679 } 682 }
680 683
681 /* 684 /*
@@ -685,8 +688,12 @@ void ieee80211_scan_work(struct work_struct *work)
685 do { 688 do {
686 switch (local->next_scan_state) { 689 switch (local->next_scan_state) {
687 case SCAN_DECISION: 690 case SCAN_DECISION:
688 if (ieee80211_scan_state_decision(local, &next_delay)) 691 /* if no more bands/channels left, complete scan */
689 return; 692 if (local->scan_channel_idx >= local->scan_req->n_channels) {
693 aborted = false;
694 goto out_complete;
695 }
696 ieee80211_scan_state_decision(local, &next_delay);
690 break; 697 break;
691 case SCAN_SET_CHANNEL: 698 case SCAN_SET_CHANNEL:
692 ieee80211_scan_state_set_channel(local, &next_delay); 699 ieee80211_scan_state_set_channel(local, &next_delay);
@@ -704,6 +711,19 @@ void ieee80211_scan_work(struct work_struct *work)
704 } while (next_delay == 0); 711 } while (next_delay == 0);
705 712
706 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); 713 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
714 mutex_unlock(&local->mtx);
715 return;
716
717out_complete:
718 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
719 finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan);
720 mutex_unlock(&local->mtx);
721 if (finish)
722 __ieee80211_scan_completed_finish(&local->hw, hw_scan);
723 return;
724
725out:
726 mutex_unlock(&local->mtx);
707} 727}
708 728
709int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 729int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
@@ -711,9 +731,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
711{ 731{
712 int res; 732 int res;
713 733
714 mutex_lock(&sdata->local->scan_mtx); 734 mutex_lock(&sdata->local->mtx);
715 res = __ieee80211_start_scan(sdata, req); 735 res = __ieee80211_start_scan(sdata, req);
716 mutex_unlock(&sdata->local->scan_mtx); 736 mutex_unlock(&sdata->local->mtx);
717 737
718 return res; 738 return res;
719} 739}
@@ -726,7 +746,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
726 int ret = -EBUSY; 746 int ret = -EBUSY;
727 enum ieee80211_band band; 747 enum ieee80211_band band;
728 748
729 mutex_lock(&local->scan_mtx); 749 mutex_lock(&local->mtx);
730 750
731 /* busy scanning */ 751 /* busy scanning */
732 if (local->scan_req) 752 if (local->scan_req)
@@ -761,25 +781,44 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
761 781
762 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); 782 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
763 unlock: 783 unlock:
764 mutex_unlock(&local->scan_mtx); 784 mutex_unlock(&local->mtx);
765 return ret; 785 return ret;
766} 786}
767 787
788/*
789 * Only call this function when a scan can't be queued -- under RTNL.
790 */
768void ieee80211_scan_cancel(struct ieee80211_local *local) 791void ieee80211_scan_cancel(struct ieee80211_local *local)
769{ 792{
770 bool abortscan; 793 bool abortscan;
771 794 bool finish = false;
772 cancel_delayed_work_sync(&local->scan_work);
773 795
774 /* 796 /*
775 * Only call this function when a scan can't be 797 * We are only canceling software scan, or deferred scan that was not
776 * queued -- mostly at suspend under RTNL. 798 * yet really started (see __ieee80211_start_scan ).
799 *
800 * Regarding hardware scan:
801 * - we can not call __ieee80211_scan_completed() as when
802 * SCAN_HW_SCANNING bit is set this function change
803 * local->hw_scan_req to operate on 5G band, what race with
804 * driver which can use local->hw_scan_req
805 *
806 * - we can not cancel scan_work since driver can schedule it
807 * by ieee80211_scan_completed(..., true) to finish scan
808 *
809 * Hence low lever driver is responsible for canceling HW scan.
777 */ 810 */
778 mutex_lock(&local->scan_mtx);
779 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
780 (!local->scanning && local->scan_req);
781 mutex_unlock(&local->scan_mtx);
782 811
812 mutex_lock(&local->mtx);
813 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning);
783 if (abortscan) 814 if (abortscan)
784 ieee80211_scan_completed(&local->hw, true); 815 finish = __ieee80211_scan_completed(&local->hw, true, false);
816 mutex_unlock(&local->mtx);
817
818 if (abortscan) {
819 /* The scan is canceled, but stop work from being pending */
820 cancel_delayed_work_sync(&local->scan_work);
821 }
822 if (finish)
823 __ieee80211_scan_completed_finish(&local->hw, false);
785} 824}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d86f0c1ad04..6d8f897d8763 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -125,7 +125,7 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
125 lockdep_is_held(&local->sta_mtx)); 125 lockdep_is_held(&local->sta_mtx));
126 while (sta) { 126 while (sta) {
127 if ((sta->sdata == sdata || 127 if ((sta->sdata == sdata ||
128 sta->sdata->bss == sdata->bss) && 128 (sta->sdata->bss && sta->sdata->bss == sdata->bss)) &&
129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) 129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
130 break; 130 break;
131 sta = rcu_dereference_check(sta->hnext, 131 sta = rcu_dereference_check(sta->hnext,
@@ -174,8 +174,7 @@ static void __sta_info_free(struct ieee80211_local *local,
174 } 174 }
175 175
176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
177 printk(KERN_DEBUG "%s: Destroyed STA %pM\n", 177 wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr);
178 wiphy_name(local->hw.wiphy), sta->sta.addr);
179#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 178#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
180 179
181 kfree(sta); 180 kfree(sta);
@@ -262,8 +261,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); 261 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 262
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 263#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 264 wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr);
266 wiphy_name(local->hw.wiphy), sta->sta.addr);
267#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 265#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
268 266
269#ifdef CONFIG_MAC80211_MESH 267#ifdef CONFIG_MAC80211_MESH
@@ -282,7 +280,7 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
282 unsigned long flags; 280 unsigned long flags;
283 int err = 0; 281 int err = 0;
284 282
285 WARN_ON(!mutex_is_locked(&local->sta_mtx)); 283 lockdep_assert_held(&local->sta_mtx);
286 284
287 /* notify driver */ 285 /* notify driver */
288 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 286 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -300,8 +298,9 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
300 sta->uploaded = true; 298 sta->uploaded = true;
301#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 299#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
302 if (async) 300 if (async)
303 printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", 301 wiphy_debug(local->hw.wiphy,
304 wiphy_name(local->hw.wiphy), sta->sta.addr); 302 "Finished adding IBSS STA %pM\n",
303 sta->sta.addr);
305#endif 304#endif
306 } 305 }
307 306
@@ -411,8 +410,8 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
411 spin_unlock_irqrestore(&local->sta_lock, flags); 410 spin_unlock_irqrestore(&local->sta_lock, flags);
412 411
413#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 412#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
414 printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", 413 wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n",
415 wiphy_name(local->hw.wiphy), sta->sta.addr); 414 sta->sta.addr);
416#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 415#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
417 416
418 ieee80211_queue_work(&local->hw, &local->sta_finish_work); 417 ieee80211_queue_work(&local->hw, &local->sta_finish_work);
@@ -459,8 +458,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
459 } 458 }
460 459
461#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 460#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
462 printk(KERN_DEBUG "%s: Inserted STA %pM\n", 461 wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr);
463 wiphy_name(local->hw.wiphy), sta->sta.addr);
464#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 462#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
465 463
466 /* move reference to rcu-protected */ 464 /* move reference to rcu-protected */
@@ -618,7 +616,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
618 struct ieee80211_sub_if_data *sdata; 616 struct ieee80211_sub_if_data *sdata;
619 struct sk_buff *skb; 617 struct sk_buff *skb;
620 unsigned long flags; 618 unsigned long flags;
621 int ret; 619 int ret, i;
622 620
623 might_sleep(); 621 might_sleep();
624 622
@@ -635,7 +633,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
635 * will be sufficient. 633 * will be sufficient.
636 */ 634 */
637 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 635 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
638 ieee80211_sta_tear_down_BA_sessions(sta); 636 ieee80211_sta_tear_down_BA_sessions(sta, true);
639 637
640 spin_lock_irqsave(&local->sta_lock, flags); 638 spin_lock_irqsave(&local->sta_lock, flags);
641 ret = sta_info_hash_del(local, sta); 639 ret = sta_info_hash_del(local, sta);
@@ -646,10 +644,10 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
646 if (ret) 644 if (ret)
647 return ret; 645 return ret;
648 646
649 if (sta->key) { 647 for (i = 0; i < NUM_DEFAULT_KEYS; i++)
650 ieee80211_key_free(local, sta->key); 648 ieee80211_key_free(local, sta->gtk[i]);
651 WARN_ON(sta->key); 649 if (sta->ptk)
652 } 650 ieee80211_key_free(local, sta->ptk);
653 651
654 sta->dead = true; 652 sta->dead = true;
655 653
@@ -690,8 +688,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
690#endif 688#endif
691 689
692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 690#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
693 printk(KERN_DEBUG "%s: Removed STA %pM\n", 691 wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr);
694 wiphy_name(local->hw.wiphy), sta->sta.addr);
695#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 692#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
696 cancel_work_sync(&sta->drv_unblock_wk); 693 cancel_work_sync(&sta->drv_unblock_wk);
697 694
@@ -841,13 +838,20 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
841 mutex_unlock(&local->sta_mtx); 838 mutex_unlock(&local->sta_mtx);
842} 839}
843 840
844struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, 841struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
845 const u8 *addr) 842 const u8 *addr,
843 const u8 *localaddr)
846{ 844{
847 struct sta_info *sta, *nxt; 845 struct sta_info *sta, *nxt;
848 846
849 /* Just return a random station ... first in list ... */ 847 /*
848 * Just return a random station if localaddr is NULL
849 * ... first in list.
850 */
850 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { 851 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
852 if (localaddr &&
853 compare_ether_addr(sta->sdata->vif.addr, localaddr) != 0)
854 continue;
851 if (!sta->uploaded) 855 if (!sta->uploaded)
852 return NULL; 856 return NULL;
853 return &sta->sta; 857 return &sta->sta;
@@ -855,7 +859,7 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
855 859
856 return NULL; 860 return NULL;
857} 861}
858EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); 862EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr);
859 863
860struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, 864struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
861 const u8 *addr) 865 const u8 *addr)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 54262e72376d..9265acadef32 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -79,6 +79,7 @@ enum ieee80211_sta_info_flags {
79 * @dialog_token: dialog token for aggregation session 79 * @dialog_token: dialog token for aggregation session
80 * @state: session state (see above) 80 * @state: session state (see above)
81 * @stop_initiator: initiator of a session stop 81 * @stop_initiator: initiator of a session stop
82 * @tx_stop: TX DelBA frame when stopping
82 * 83 *
83 * This structure is protected by RCU and the per-station 84 * This structure is protected by RCU and the per-station
84 * spinlock. Assignments to the array holding it must hold 85 * spinlock. Assignments to the array holding it must hold
@@ -95,6 +96,7 @@ struct tid_ampdu_tx {
95 unsigned long state; 96 unsigned long state;
96 u8 dialog_token; 97 u8 dialog_token;
97 u8 stop_initiator; 98 u8 stop_initiator;
99 bool tx_stop;
98}; 100};
99 101
100/** 102/**
@@ -103,6 +105,7 @@ struct tid_ampdu_tx {
103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs 105 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
104 * @reorder_time: jiffies when skb was added 106 * @reorder_time: jiffies when skb was added
105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 107 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
108 * @reorder_timer: releases expired frames from the reorder buffer.
106 * @head_seq_num: head sequence number in reordering buffer. 109 * @head_seq_num: head sequence number in reordering buffer.
107 * @stored_mpdu_num: number of MPDUs in reordering buffer 110 * @stored_mpdu_num: number of MPDUs in reordering buffer
108 * @ssn: Starting Sequence Number expected to be aggregated. 111 * @ssn: Starting Sequence Number expected to be aggregated.
@@ -110,20 +113,25 @@ struct tid_ampdu_tx {
110 * @timeout: reset timer value (in TUs). 113 * @timeout: reset timer value (in TUs).
111 * @dialog_token: dialog token for aggregation session 114 * @dialog_token: dialog token for aggregation session
112 * @rcu_head: RCU head used for freeing this struct 115 * @rcu_head: RCU head used for freeing this struct
116 * @reorder_lock: serializes access to reorder buffer, see below.
113 * 117 *
114 * This structure is protected by RCU and the per-station 118 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold 119 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU 120 * the spinlock.
117 * lock-free. The RX path, since it is single-threaded, 121 *
118 * can even modify the structure without locking since the 122 * The @reorder_lock is used to protect the variables and
119 * only other modifications to it are done when the struct 123 * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
120 * can not yet or no longer be found by the RX path. 124 * @stored_mpdu_num and @reorder_time from being corrupted by
125 * concurrent access of the RX path and the expired frame
126 * release timer.
121 */ 127 */
122struct tid_ampdu_rx { 128struct tid_ampdu_rx {
123 struct rcu_head rcu_head; 129 struct rcu_head rcu_head;
130 spinlock_t reorder_lock;
124 struct sk_buff **reorder_buf; 131 struct sk_buff **reorder_buf;
125 unsigned long *reorder_time; 132 unsigned long *reorder_time;
126 struct timer_list session_timer; 133 struct timer_list session_timer;
134 struct timer_list reorder_timer;
127 u16 head_seq_num; 135 u16 head_seq_num;
128 u16 stored_mpdu_num; 136 u16 stored_mpdu_num;
129 u16 ssn; 137 u16 ssn;
@@ -191,7 +199,8 @@ enum plink_state {
191 * @hnext: hash table linked list pointer 199 * @hnext: hash table linked list pointer
192 * @local: pointer to the global information 200 * @local: pointer to the global information
193 * @sdata: virtual interface this station belongs to 201 * @sdata: virtual interface this station belongs to
194 * @key: peer key negotiated with this station, if any 202 * @ptk: peer key negotiated with this station, if any
203 * @gtk: group keys negotiated with this station, if any
195 * @rate_ctrl: rate control algorithm reference 204 * @rate_ctrl: rate control algorithm reference
196 * @rate_ctrl_priv: rate control private per-STA pointer 205 * @rate_ctrl_priv: rate control private per-STA pointer
197 * @last_tx_rate: rate used for last transmit, to report to userspace as 206 * @last_tx_rate: rate used for last transmit, to report to userspace as
@@ -246,7 +255,8 @@ struct sta_info {
246 struct sta_info *hnext; 255 struct sta_info *hnext;
247 struct ieee80211_local *local; 256 struct ieee80211_local *local;
248 struct ieee80211_sub_if_data *sdata; 257 struct ieee80211_sub_if_data *sdata;
249 struct ieee80211_key *key; 258 struct ieee80211_key *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
259 struct ieee80211_key *ptk;
250 struct rate_control_ref *rate_ctrl; 260 struct rate_control_ref *rate_ctrl;
251 void *rate_ctrl_priv; 261 void *rate_ctrl_priv;
252 spinlock_t lock; 262 spinlock_t lock;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 34da67995d94..3153c19893b8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -58,6 +58,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
58 info->control.vif = &sta->sdata->vif; 58 info->control.vif = &sta->sdata->vif;
59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING | 59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
60 IEEE80211_TX_INTFL_RETRANSMISSION; 60 IEEE80211_TX_INTFL_RETRANSMISSION;
61 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
61 62
62 sta->tx_filtered_count++; 63 sta->tx_filtered_count++;
63 64
@@ -114,11 +115,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
114 115
115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 116#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
116 if (net_ratelimit()) 117 if (net_ratelimit())
117 printk(KERN_DEBUG "%s: dropped TX filtered frame, " 118 wiphy_debug(local->hw.wiphy,
118 "queue_len=%d PS=%d @%lu\n", 119 "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
119 wiphy_name(local->hw.wiphy), 120 skb_queue_len(&sta->tx_filtered),
120 skb_queue_len(&sta->tx_filtered), 121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
122#endif 122#endif
123 dev_kfree_skb(skb); 123 dev_kfree_skb(skb);
124} 124}
@@ -176,7 +176,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
176 176
177 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 177 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
178 /* the HW cannot have attempted that rate */ 178 /* the HW cannot have attempted that rate */
179 if (i >= hw->max_rates) { 179 if (i >= hw->max_report_rates) {
180 info->status.rates[i].idx = -1; 180 info->status.rates[i].idx = -1;
181 info->status.rates[i].count = 0; 181 info->status.rates[i].count = 0;
182 } else if (info->status.rates[i].idx >= 0) { 182 } else if (info->status.rates[i].idx >= 0) {
@@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 } 296 }
297 297
298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) 298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
299 cfg80211_action_tx_status( 299 cfg80211_mgmt_tx_status(
300 skb->dev, (unsigned long) skb, skb->data, skb->len, 300 skb->dev, (unsigned long) skb, skb->data, skb->len,
301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
302 302
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926b..96c594309506 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -273,6 +273,9 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
273 */ 273 */
274 return TX_DROP; 274 return TX_DROP;
275 275
276 if (tx->sdata->vif.type == NL80211_IFTYPE_WDS)
277 return TX_CONTINUE;
278
276 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) 279 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
277 return TX_CONTINUE; 280 return TX_CONTINUE;
278 281
@@ -351,8 +354,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
351 354
352 local->total_ps_buffered = total; 355 local->total_ps_buffered = total;
353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 356#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
354 printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", 357 wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n",
355 wiphy_name(local->hw.wiphy), purged); 358 purged);
356#endif 359#endif
357} 360}
358 361
@@ -509,6 +512,18 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
509} 512}
510 513
511static ieee80211_tx_result debug_noinline 514static ieee80211_tx_result debug_noinline
515ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
516{
517 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
518
519 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
520 tx->sdata->control_port_no_encrypt))
521 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
522
523 return TX_CONTINUE;
524}
525
526static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) 527ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513{ 528{
514 struct ieee80211_key *key = NULL; 529 struct ieee80211_key *key = NULL;
@@ -517,7 +532,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
517 532
518 if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) 533 if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
519 tx->key = NULL; 534 tx->key = NULL;
520 else if (tx->sta && (key = rcu_dereference(tx->sta->key))) 535 else if (tx->sta && (key = rcu_dereference(tx->sta->ptk)))
521 tx->key = key; 536 tx->key = key;
522 else if (ieee80211_is_mgmt(hdr->frame_control) && 537 else if (ieee80211_is_mgmt(hdr->frame_control) &&
523 is_multicast_ether_addr(hdr->addr1) && 538 is_multicast_ether_addr(hdr->addr1) &&
@@ -527,7 +542,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
527 else if ((key = rcu_dereference(tx->sdata->default_key))) 542 else if ((key = rcu_dereference(tx->sdata->default_key)))
528 tx->key = key; 543 tx->key = key;
529 else if (tx->sdata->drop_unencrypted && 544 else if (tx->sdata->drop_unencrypted &&
530 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 545 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
531 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 546 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
532 (!ieee80211_is_robust_mgmt_frame(hdr) || 547 (!ieee80211_is_robust_mgmt_frame(hdr) ||
533 (ieee80211_is_action(hdr->frame_control) && 548 (ieee80211_is_action(hdr->frame_control) &&
@@ -543,15 +558,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
543 tx->key->tx_rx_count++; 558 tx->key->tx_rx_count++;
544 /* TODO: add threshold stuff again */ 559 /* TODO: add threshold stuff again */
545 560
546 switch (tx->key->conf.alg) { 561 switch (tx->key->conf.cipher) {
547 case ALG_WEP: 562 case WLAN_CIPHER_SUITE_WEP40:
563 case WLAN_CIPHER_SUITE_WEP104:
548 if (ieee80211_is_auth(hdr->frame_control)) 564 if (ieee80211_is_auth(hdr->frame_control))
549 break; 565 break;
550 case ALG_TKIP: 566 case WLAN_CIPHER_SUITE_TKIP:
551 if (!ieee80211_is_data_present(hdr->frame_control)) 567 if (!ieee80211_is_data_present(hdr->frame_control))
552 tx->key = NULL; 568 tx->key = NULL;
553 break; 569 break;
554 case ALG_CCMP: 570 case WLAN_CIPHER_SUITE_CCMP:
555 if (!ieee80211_is_data_present(hdr->frame_control) && 571 if (!ieee80211_is_data_present(hdr->frame_control) &&
556 !ieee80211_use_mfp(hdr->frame_control, tx->sta, 572 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
557 tx->skb)) 573 tx->skb))
@@ -561,7 +577,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
561 IEEE80211_KEY_FLAG_SW_MGMT) && 577 IEEE80211_KEY_FLAG_SW_MGMT) &&
562 ieee80211_is_mgmt(hdr->frame_control); 578 ieee80211_is_mgmt(hdr->frame_control);
563 break; 579 break;
564 case ALG_AES_CMAC: 580 case WLAN_CIPHER_SUITE_AES_CMAC:
565 if (!ieee80211_is_mgmt(hdr->frame_control)) 581 if (!ieee80211_is_mgmt(hdr->frame_control))
566 tx->key = NULL; 582 tx->key = NULL;
567 break; 583 break;
@@ -946,22 +962,31 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
946static ieee80211_tx_result debug_noinline 962static ieee80211_tx_result debug_noinline
947ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) 963ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
948{ 964{
965 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
966
949 if (!tx->key) 967 if (!tx->key)
950 return TX_CONTINUE; 968 return TX_CONTINUE;
951 969
952 switch (tx->key->conf.alg) { 970 switch (tx->key->conf.cipher) {
953 case ALG_WEP: 971 case WLAN_CIPHER_SUITE_WEP40:
972 case WLAN_CIPHER_SUITE_WEP104:
954 return ieee80211_crypto_wep_encrypt(tx); 973 return ieee80211_crypto_wep_encrypt(tx);
955 case ALG_TKIP: 974 case WLAN_CIPHER_SUITE_TKIP:
956 return ieee80211_crypto_tkip_encrypt(tx); 975 return ieee80211_crypto_tkip_encrypt(tx);
957 case ALG_CCMP: 976 case WLAN_CIPHER_SUITE_CCMP:
958 return ieee80211_crypto_ccmp_encrypt(tx); 977 return ieee80211_crypto_ccmp_encrypt(tx);
959 case ALG_AES_CMAC: 978 case WLAN_CIPHER_SUITE_AES_CMAC:
960 return ieee80211_crypto_aes_cmac_encrypt(tx); 979 return ieee80211_crypto_aes_cmac_encrypt(tx);
980 default:
981 /* handle hw-only algorithm */
982 if (info->control.hw_key) {
983 ieee80211_tx_set_protected(tx);
984 return TX_CONTINUE;
985 }
986 break;
987
961 } 988 }
962 989
963 /* not reached */
964 WARN_ON(1);
965 return TX_DROP; 990 return TX_DROP;
966} 991}
967 992
@@ -1339,6 +1364,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1339 CALL_TXH(ieee80211_tx_h_dynamic_ps); 1364 CALL_TXH(ieee80211_tx_h_dynamic_ps);
1340 CALL_TXH(ieee80211_tx_h_check_assoc); 1365 CALL_TXH(ieee80211_tx_h_check_assoc);
1341 CALL_TXH(ieee80211_tx_h_ps_buf); 1366 CALL_TXH(ieee80211_tx_h_ps_buf);
1367 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1342 CALL_TXH(ieee80211_tx_h_select_key); 1368 CALL_TXH(ieee80211_tx_h_select_key);
1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1369 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1344 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1370 CALL_TXH(ieee80211_tx_h_rate_ctrl);
@@ -1511,8 +1537,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1511 I802_DEBUG_INC(local->tx_expand_skb_head); 1537 I802_DEBUG_INC(local->tx_expand_skb_head);
1512 1538
1513 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { 1539 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
1514 printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", 1540 wiphy_debug(local->hw.wiphy,
1515 wiphy_name(local->hw.wiphy)); 1541 "failed to reallocate TX buffer\n");
1516 return -ENOMEM; 1542 return -ENOMEM;
1517 } 1543 }
1518 1544
@@ -1586,6 +1612,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1586 return; 1612 return;
1587 } 1613 }
1588 1614
1615 hdr = (struct ieee80211_hdr *) skb->data;
1589 info->control.vif = &sdata->vif; 1616 info->control.vif = &sdata->vif;
1590 1617
1591 if (ieee80211_vif_is_mesh(&sdata->vif) && 1618 if (ieee80211_vif_is_mesh(&sdata->vif) &&
@@ -1699,7 +1726,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1699 u16 ethertype, hdrlen, meshhdrlen = 0; 1726 u16 ethertype, hdrlen, meshhdrlen = 0;
1700 __le16 fc; 1727 __le16 fc;
1701 struct ieee80211_hdr hdr; 1728 struct ieee80211_hdr hdr;
1702 struct ieee80211s_hdr mesh_hdr; 1729 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1703 const u8 *encaps_data; 1730 const u8 *encaps_data;
1704 int encaps_len, skip_header_bytes; 1731 int encaps_len, skip_header_bytes;
1705 int nh_pos, h_pos; 1732 int nh_pos, h_pos;
@@ -1816,7 +1843,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1816#endif 1843#endif
1817 case NL80211_IFTYPE_STATION: 1844 case NL80211_IFTYPE_STATION:
1818 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); 1845 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1819 if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { 1846 if (sdata->u.mgd.use_4addr &&
1847 cpu_to_be16(ethertype) != sdata->control_port_protocol) {
1820 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); 1848 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
1821 /* RA TA DA SA */ 1849 /* RA TA DA SA */
1822 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); 1850 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
@@ -1869,7 +1897,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1869 if (!ieee80211_vif_is_mesh(&sdata->vif) && 1897 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
1870 unlikely(!is_multicast_ether_addr(hdr.addr1) && 1898 unlikely(!is_multicast_ether_addr(hdr.addr1) &&
1871 !(sta_flags & WLAN_STA_AUTHORIZED) && 1899 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1872 !(ethertype == ETH_P_PAE && 1900 !(cpu_to_be16(ethertype) == sdata->control_port_protocol &&
1873 compare_ether_addr(sdata->vif.addr, 1901 compare_ether_addr(sdata->vif.addr,
1874 skb->data + ETH_ALEN) == 0))) { 1902 skb->data + ETH_ALEN) == 0))) {
1875#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1903#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2068,8 +2096,7 @@ void ieee80211_tx_pending(unsigned long data)
2068 2096
2069 if (skb_queue_empty(&local->pending[i])) 2097 if (skb_queue_empty(&local->pending[i]))
2070 list_for_each_entry_rcu(sdata, &local->interfaces, list) 2098 list_for_each_entry_rcu(sdata, &local->interfaces, list)
2071 netif_tx_wake_queue( 2099 netif_wake_subqueue(sdata->dev, i);
2072 netdev_get_tx_queue(sdata->dev, i));
2073 } 2100 }
2074 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 2101 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
2075 2102
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc0..0b6fc92bc0d7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -283,8 +283,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
283 283
284 if (skb_queue_empty(&local->pending[queue])) { 284 if (skb_queue_empty(&local->pending[queue])) {
285 rcu_read_lock(); 285 rcu_read_lock();
286 list_for_each_entry_rcu(sdata, &local->interfaces, list) 286 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
287 netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); 287 if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
288 continue;
289 netif_wake_subqueue(sdata->dev, queue);
290 }
288 rcu_read_unlock(); 291 rcu_read_unlock();
289 } else 292 } else
290 tasklet_schedule(&local->tx_pending_tasklet); 293 tasklet_schedule(&local->tx_pending_tasklet);
@@ -323,7 +326,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
323 326
324 rcu_read_lock(); 327 rcu_read_lock();
325 list_for_each_entry_rcu(sdata, &local->interfaces, list) 328 list_for_each_entry_rcu(sdata, &local->interfaces, list)
326 netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); 329 netif_stop_subqueue(sdata->dev, queue);
327 rcu_read_unlock(); 330 rcu_read_unlock();
328} 331}
329 332
@@ -471,16 +474,10 @@ void ieee80211_iterate_active_interfaces(
471 474
472 list_for_each_entry(sdata, &local->interfaces, list) { 475 list_for_each_entry(sdata, &local->interfaces, list) {
473 switch (sdata->vif.type) { 476 switch (sdata->vif.type) {
474 case __NL80211_IFTYPE_AFTER_LAST:
475 case NL80211_IFTYPE_UNSPECIFIED:
476 case NL80211_IFTYPE_MONITOR: 477 case NL80211_IFTYPE_MONITOR:
477 case NL80211_IFTYPE_AP_VLAN: 478 case NL80211_IFTYPE_AP_VLAN:
478 continue; 479 continue;
479 case NL80211_IFTYPE_AP: 480 default:
480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_ADHOC:
482 case NL80211_IFTYPE_WDS:
483 case NL80211_IFTYPE_MESH_POINT:
484 break; 481 break;
485 } 482 }
486 if (ieee80211_sdata_running(sdata)) 483 if (ieee80211_sdata_running(sdata))
@@ -505,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic(
505 502
506 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 503 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
507 switch (sdata->vif.type) { 504 switch (sdata->vif.type) {
508 case __NL80211_IFTYPE_AFTER_LAST:
509 case NL80211_IFTYPE_UNSPECIFIED:
510 case NL80211_IFTYPE_MONITOR: 505 case NL80211_IFTYPE_MONITOR:
511 case NL80211_IFTYPE_AP_VLAN: 506 case NL80211_IFTYPE_AP_VLAN:
512 continue; 507 continue;
513 case NL80211_IFTYPE_AP: 508 default:
514 case NL80211_IFTYPE_STATION:
515 case NL80211_IFTYPE_ADHOC:
516 case NL80211_IFTYPE_WDS:
517 case NL80211_IFTYPE_MESH_POINT:
518 break; 509 break;
519 } 510 }
520 if (ieee80211_sdata_running(sdata)) 511 if (ieee80211_sdata_running(sdata))
@@ -904,26 +895,34 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
904 895
905int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 896int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
906 const u8 *ie, size_t ie_len, 897 const u8 *ie, size_t ie_len,
907 enum ieee80211_band band) 898 enum ieee80211_band band, u32 rate_mask,
899 u8 channel)
908{ 900{
909 struct ieee80211_supported_band *sband; 901 struct ieee80211_supported_band *sband;
910 u8 *pos; 902 u8 *pos;
911 size_t offset = 0, noffset; 903 size_t offset = 0, noffset;
912 int supp_rates_len, i; 904 int supp_rates_len, i;
905 u8 rates[32];
906 int num_rates;
907 int ext_rates_len;
913 908
914 sband = local->hw.wiphy->bands[band]; 909 sband = local->hw.wiphy->bands[band];
915 910
916 pos = buffer; 911 pos = buffer;
917 912
918 supp_rates_len = min_t(int, sband->n_bitrates, 8); 913 num_rates = 0;
914 for (i = 0; i < sband->n_bitrates; i++) {
915 if ((BIT(i) & rate_mask) == 0)
916 continue; /* skip rate */
917 rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5);
918 }
919
920 supp_rates_len = min_t(int, num_rates, 8);
919 921
920 *pos++ = WLAN_EID_SUPP_RATES; 922 *pos++ = WLAN_EID_SUPP_RATES;
921 *pos++ = supp_rates_len; 923 *pos++ = supp_rates_len;
922 924 memcpy(pos, rates, supp_rates_len);
923 for (i = 0; i < supp_rates_len; i++) { 925 pos += supp_rates_len;
924 int rate = sband->bitrates[i].bitrate;
925 *pos++ = (u8) (rate / 5);
926 }
927 926
928 /* insert "request information" if in custom IEs */ 927 /* insert "request information" if in custom IEs */
929 if (ie && ie_len) { 928 if (ie && ie_len) {
@@ -941,14 +940,18 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
941 offset = noffset; 940 offset = noffset;
942 } 941 }
943 942
944 if (sband->n_bitrates > i) { 943 ext_rates_len = num_rates - supp_rates_len;
944 if (ext_rates_len > 0) {
945 *pos++ = WLAN_EID_EXT_SUPP_RATES; 945 *pos++ = WLAN_EID_EXT_SUPP_RATES;
946 *pos++ = sband->n_bitrates - i; 946 *pos++ = ext_rates_len;
947 memcpy(pos, rates + supp_rates_len, ext_rates_len);
948 pos += ext_rates_len;
949 }
947 950
948 for (; i < sband->n_bitrates; i++) { 951 if (channel && sband->band == IEEE80211_BAND_2GHZ) {
949 int rate = sband->bitrates[i].bitrate; 952 *pos++ = WLAN_EID_DS_PARAMS;
950 *pos++ = (u8) (rate / 5); 953 *pos++ = 1;
951 } 954 *pos++ = channel;
952 } 955 }
953 956
954 /* insert custom IEs that go before HT */ 957 /* insert custom IEs that go before HT */
@@ -1017,6 +1020,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1017 struct ieee80211_mgmt *mgmt; 1020 struct ieee80211_mgmt *mgmt;
1018 size_t buf_len; 1021 size_t buf_len;
1019 u8 *buf; 1022 u8 *buf;
1023 u8 chan;
1020 1024
1021 /* FIXME: come up with a proper value */ 1025 /* FIXME: come up with a proper value */
1022 buf = kmalloc(200 + ie_len, GFP_KERNEL); 1026 buf = kmalloc(200 + ie_len, GFP_KERNEL);
@@ -1026,8 +1030,14 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1026 return; 1030 return;
1027 } 1031 }
1028 1032
1033 chan = ieee80211_frequency_to_channel(
1034 local->hw.conf.channel->center_freq);
1035
1029 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, 1036 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len,
1030 local->hw.conf.channel->band); 1037 local->hw.conf.channel->band,
1038 sdata->rc_rateidx_mask
1039 [local->hw.conf.channel->band],
1040 chan);
1031 1041
1032 skb = ieee80211_probereq_get(&local->hw, &sdata->vif, 1042 skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
1033 ssid, ssid_len, 1043 ssid, ssid_len,
@@ -1189,7 +1199,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1189 /* ignore virtual */ 1199 /* ignore virtual */
1190 break; 1200 break;
1191 case NL80211_IFTYPE_UNSPECIFIED: 1201 case NL80211_IFTYPE_UNSPECIFIED:
1192 case __NL80211_IFTYPE_AFTER_LAST: 1202 case NUM_NL80211_IFTYPES:
1203 case NL80211_IFTYPE_P2P_CLIENT:
1204 case NL80211_IFTYPE_P2P_GO:
1193 WARN_ON(1); 1205 WARN_ON(1);
1194 break; 1206 break;
1195 } 1207 }
@@ -1209,7 +1221,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1209 mutex_lock(&local->sta_mtx); 1221 mutex_lock(&local->sta_mtx);
1210 1222
1211 list_for_each_entry(sta, &local->sta_list, list) { 1223 list_for_each_entry(sta, &local->sta_list, list) {
1212 ieee80211_sta_tear_down_BA_sessions(sta); 1224 ieee80211_sta_tear_down_BA_sessions(sta, true);
1213 clear_sta_flags(sta, WLAN_STA_BLOCK_BA); 1225 clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
1214 } 1226 }
1215 1227
@@ -1285,17 +1297,13 @@ static int check_mgd_smps(struct ieee80211_if_managed *ifmgd,
1285} 1297}
1286 1298
1287/* must hold iflist_mtx */ 1299/* must hold iflist_mtx */
1288void ieee80211_recalc_smps(struct ieee80211_local *local, 1300void ieee80211_recalc_smps(struct ieee80211_local *local)
1289 struct ieee80211_sub_if_data *forsdata)
1290{ 1301{
1291 struct ieee80211_sub_if_data *sdata; 1302 struct ieee80211_sub_if_data *sdata;
1292 enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; 1303 enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF;
1293 int count = 0; 1304 int count = 0;
1294 1305
1295 if (forsdata) 1306 lockdep_assert_held(&local->iflist_mtx);
1296 WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx));
1297
1298 WARN_ON(!mutex_is_locked(&local->iflist_mtx));
1299 1307
1300 /* 1308 /*
1301 * This function could be improved to handle multiple 1309 * This function could be improved to handle multiple
@@ -1308,22 +1316,12 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1308 */ 1316 */
1309 1317
1310 list_for_each_entry(sdata, &local->interfaces, list) { 1318 list_for_each_entry(sdata, &local->interfaces, list) {
1311 if (!netif_running(sdata->dev)) 1319 if (!ieee80211_sdata_running(sdata))
1312 continue; 1320 continue;
1313 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1321 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1314 goto set; 1322 goto set;
1315 if (sdata != forsdata) { 1323
1316 /* 1324 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1317 * This nested is ok -- we are holding the iflist_mtx
1318 * so can't get here twice or so. But it's required
1319 * since normally we acquire it first and then the
1320 * iflist_mtx.
1321 */
1322 mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING);
1323 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1324 mutex_unlock(&sdata->u.mgd.mtx);
1325 } else
1326 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1327 1325
1328 if (count > 1) { 1326 if (count > 1) {
1329 smps_mode = IEEE80211_SMPS_OFF; 1327 smps_mode = IEEE80211_SMPS_OFF;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5b..2ff6d1e3ed21 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -222,7 +222,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
222 struct ieee80211_key *key) 222 struct ieee80211_key *key)
223{ 223{
224 u32 klen; 224 u32 klen;
225 u8 *rc4key; 225 u8 rc4key[3 + WLAN_KEY_LEN_WEP104];
226 u8 keyidx; 226 u8 keyidx;
227 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 227 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
228 unsigned int hdrlen; 228 unsigned int hdrlen;
@@ -240,15 +240,11 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
240 240
241 keyidx = skb->data[hdrlen + 3] >> 6; 241 keyidx = skb->data[hdrlen + 3] >> 6;
242 242
243 if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) 243 if (!key || keyidx != key->conf.keyidx)
244 return -1; 244 return -1;
245 245
246 klen = 3 + key->conf.keylen; 246 klen = 3 + key->conf.keylen;
247 247
248 rc4key = kmalloc(klen, GFP_ATOMIC);
249 if (!rc4key)
250 return -1;
251
252 /* Prepend 24-bit IV to RC4 key */ 248 /* Prepend 24-bit IV to RC4 key */
253 memcpy(rc4key, skb->data + hdrlen, 3); 249 memcpy(rc4key, skb->data + hdrlen, 3);
254 250
@@ -260,8 +256,6 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
260 len)) 256 len))
261 ret = -1; 257 ret = -1;
262 258
263 kfree(rc4key);
264
265 /* Trim ICV */ 259 /* Trim ICV */
266 skb_trim(skb, skb->len - WEP_ICV_LEN); 260 skb_trim(skb, skb->len - WEP_ICV_LEN);
267 261
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81d4ad64184a..ae344d1ba056 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -43,7 +43,7 @@ enum work_action {
43/* utils */ 43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) 44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{ 45{
46 WARN_ON(!mutex_is_locked(&local->work_mtx)); 46 lockdep_assert_held(&local->mtx);
47} 47}
48 48
49/* 49/*
@@ -757,7 +757,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
757 mgmt = (struct ieee80211_mgmt *) skb->data; 757 mgmt = (struct ieee80211_mgmt *) skb->data;
758 fc = le16_to_cpu(mgmt->frame_control); 758 fc = le16_to_cpu(mgmt->frame_control);
759 759
760 mutex_lock(&local->work_mtx); 760 mutex_lock(&local->mtx);
761 761
762 list_for_each_entry(wk, &local->work_list, list) { 762 list_for_each_entry(wk, &local->work_list, list) {
763 const u8 *bssid = NULL; 763 const u8 *bssid = NULL;
@@ -833,7 +833,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
833 WARN(1, "unexpected: %d", rma); 833 WARN(1, "unexpected: %d", rma);
834 } 834 }
835 835
836 mutex_unlock(&local->work_mtx); 836 mutex_unlock(&local->mtx);
837 837
838 if (rma != WORK_ACT_DONE) 838 if (rma != WORK_ACT_DONE)
839 goto out; 839 goto out;
@@ -845,9 +845,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
845 case WORK_DONE_REQUEUE: 845 case WORK_DONE_REQUEUE:
846 synchronize_rcu(); 846 synchronize_rcu();
847 wk->started = false; /* restart */ 847 wk->started = false; /* restart */
848 mutex_lock(&local->work_mtx); 848 mutex_lock(&local->mtx);
849 list_add_tail(&wk->list, &local->work_list); 849 list_add_tail(&wk->list, &local->work_list);
850 mutex_unlock(&local->work_mtx); 850 mutex_unlock(&local->mtx);
851 } 851 }
852 852
853 out: 853 out:
@@ -888,9 +888,9 @@ static void ieee80211_work_work(struct work_struct *work)
888 while ((skb = skb_dequeue(&local->work_skb_queue))) 888 while ((skb = skb_dequeue(&local->work_skb_queue)))
889 ieee80211_work_rx_queued_mgmt(local, skb); 889 ieee80211_work_rx_queued_mgmt(local, skb);
890 890
891 ieee80211_recalc_idle(local); 891 mutex_lock(&local->mtx);
892 892
893 mutex_lock(&local->work_mtx); 893 ieee80211_recalc_idle(local);
894 894
895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
896 bool started = wk->started; 896 bool started = wk->started;
@@ -995,20 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
995 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
996 } 996 }
997 997
998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req && 998 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning) 999 !local->scanning)
1002 ieee80211_queue_delayed_work(&local->hw, 1000 ieee80211_queue_delayed_work(&local->hw,
1003 &local->scan_work, 1001 &local->scan_work,
1004 round_jiffies_relative(0)); 1002 round_jiffies_relative(0));
1005 1003
1006 mutex_unlock(&local->scan_mtx);
1007
1008 mutex_unlock(&local->work_mtx);
1009
1010 ieee80211_recalc_idle(local); 1004 ieee80211_recalc_idle(local);
1011 1005
1006 mutex_unlock(&local->mtx);
1007
1012 list_for_each_entry_safe(wk, tmp, &free_work, list) { 1008 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1013 wk->done(wk, NULL); 1009 wk->done(wk, NULL);
1014 list_del(&wk->list); 1010 list_del(&wk->list);
@@ -1035,16 +1031,15 @@ void ieee80211_add_work(struct ieee80211_work *wk)
1035 wk->started = false; 1031 wk->started = false;
1036 1032
1037 local = wk->sdata->local; 1033 local = wk->sdata->local;
1038 mutex_lock(&local->work_mtx); 1034 mutex_lock(&local->mtx);
1039 list_add_tail(&wk->list, &local->work_list); 1035 list_add_tail(&wk->list, &local->work_list);
1040 mutex_unlock(&local->work_mtx); 1036 mutex_unlock(&local->mtx);
1041 1037
1042 ieee80211_queue_work(&local->hw, &local->work_work); 1038 ieee80211_queue_work(&local->hw, &local->work_work);
1043} 1039}
1044 1040
1045void ieee80211_work_init(struct ieee80211_local *local) 1041void ieee80211_work_init(struct ieee80211_local *local)
1046{ 1042{
1047 mutex_init(&local->work_mtx);
1048 INIT_LIST_HEAD(&local->work_list); 1043 INIT_LIST_HEAD(&local->work_list);
1049 setup_timer(&local->work_timer, ieee80211_work_timer, 1044 setup_timer(&local->work_timer, ieee80211_work_timer,
1050 (unsigned long)local); 1045 (unsigned long)local);
@@ -1057,7 +1052,7 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1057 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1058 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1059 1054
1060 mutex_lock(&local->work_mtx); 1055 mutex_lock(&local->mtx);
1061 list_for_each_entry(wk, &local->work_list, list) { 1056 list_for_each_entry(wk, &local->work_list, list) {
1062 if (wk->sdata != sdata) 1057 if (wk->sdata != sdata)
1063 continue; 1058 continue;
@@ -1065,19 +1060,19 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1065 wk->started = true; 1060 wk->started = true;
1066 wk->timeout = jiffies; 1061 wk->timeout = jiffies;
1067 } 1062 }
1068 mutex_unlock(&local->work_mtx); 1063 mutex_unlock(&local->mtx);
1069 1064
1070 /* run cleanups etc. */ 1065 /* run cleanups etc. */
1071 ieee80211_work_work(&local->work_work); 1066 ieee80211_work_work(&local->work_work);
1072 1067
1073 mutex_lock(&local->work_mtx); 1068 mutex_lock(&local->mtx);
1074 list_for_each_entry(wk, &local->work_list, list) { 1069 list_for_each_entry(wk, &local->work_list, list) {
1075 if (wk->sdata != sdata) 1070 if (wk->sdata != sdata)
1076 continue; 1071 continue;
1077 WARN_ON(1); 1072 WARN_ON(1);
1078 break; 1073 break;
1079 } 1074 }
1080 mutex_unlock(&local->work_mtx); 1075 mutex_unlock(&local->mtx);
1081} 1076}
1082 1077
1083ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1078ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1163,7 +1158,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1163 struct ieee80211_work *wk, *tmp; 1158 struct ieee80211_work *wk, *tmp;
1164 bool found = false; 1159 bool found = false;
1165 1160
1166 mutex_lock(&local->work_mtx); 1161 mutex_lock(&local->mtx);
1167 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 1162 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1168 if ((unsigned long) wk == cookie) { 1163 if ((unsigned long) wk == cookie) {
1169 wk->timeout = jiffies; 1164 wk->timeout = jiffies;
@@ -1171,7 +1166,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1171 break; 1166 break;
1172 } 1167 }
1173 } 1168 }
1174 mutex_unlock(&local->work_mtx); 1169 mutex_unlock(&local->mtx);
1175 1170
1176 if (!found) 1171 if (!found)
1177 return -ENOENT; 1172 return -ENOENT;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887e..bee230d8fd11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
36 int tail; 36 int tail;
37 37
38 hdr = (struct ieee80211_hdr *)skb->data; 38 hdr = (struct ieee80211_hdr *)skb->data;
39 if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || 39 if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
40 !ieee80211_is_data_present(hdr->frame_control)) 40 skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
41 return TX_CONTINUE; 41 return TX_CONTINUE;
42 42
43 hdrlen = ieee80211_hdrlen(hdr->frame_control); 43 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 94 if (status->flag & RX_FLAG_MMIC_STRIPPED)
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 if (!rx->key || rx->key->conf.alg != ALG_TKIP || 97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 98 !ieee80211_has_protected(hdr->frame_control) ||
99 !ieee80211_is_data_present(hdr->frame_control)) 99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 100 return RX_CONTINUE;
@@ -117,7 +117,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
117 key = &rx->key->conf.key[key_offset]; 117 key = &rx->key->conf.key[key_offset];
118 michael_mic(key, hdr, data, data_len, mic); 118 michael_mic(key, hdr, data, data_len, mic);
119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { 119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
120 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 120 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
121 return RX_DROP_UNUSABLE; 121 return RX_DROP_UNUSABLE;
122 122
123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, 123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
@@ -221,19 +221,13 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
221 if (!rx->sta || skb->len - hdrlen < 12) 221 if (!rx->sta || skb->len - hdrlen < 12)
222 return RX_DROP_UNUSABLE; 222 return RX_DROP_UNUSABLE;
223 223
224 if (status->flag & RX_FLAG_DECRYPTED) { 224 /*
225 if (status->flag & RX_FLAG_IV_STRIPPED) { 225 * Let TKIP code verify IV, but skip decryption.
226 /* 226 * In the case where hardware checks the IV as well,
227 * Hardware took care of all processing, including 227 * we don't even get here, see ieee80211_rx_h_decrypt()
228 * replay protection, and stripped the ICV/IV so 228 */
229 * we cannot do any checks here. 229 if (status->flag & RX_FLAG_DECRYPTED)
230 */
231 return RX_CONTINUE;
232 }
233
234 /* let TKIP code verify IV, but skip decryption */
235 hwaccel = 1; 230 hwaccel = 1;
236 }
237 231
238 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, 232 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
239 key, skb->data + hdrlen, 233 key, skb->data + hdrlen,
@@ -447,10 +441,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
447 if (!rx->sta || data_len < 0) 441 if (!rx->sta || data_len < 0)
448 return RX_DROP_UNUSABLE; 442 return RX_DROP_UNUSABLE;
449 443
450 if ((status->flag & RX_FLAG_DECRYPTED) &&
451 (status->flag & RX_FLAG_IV_STRIPPED))
452 return RX_CONTINUE;
453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 444 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 445
456 queue = ieee80211_is_mgmt(hdr->frame_control) ? 446 queue = ieee80211_is_mgmt(hdr->frame_control) ?
@@ -564,10 +554,6 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
564 if (!ieee80211_is_mgmt(hdr->frame_control)) 554 if (!ieee80211_is_mgmt(hdr->frame_control))
565 return RX_CONTINUE; 555 return RX_CONTINUE;
566 556
567 if ((status->flag & RX_FLAG_DECRYPTED) &&
568 (status->flag & RX_FLAG_IV_STRIPPED))
569 return RX_CONTINUE;
570
571 if (skb->len < 24 + sizeof(*mmie)) 557 if (skb->len < 24 + sizeof(*mmie))
572 return RX_DROP_UNUSABLE; 558 return RX_DROP_UNUSABLE;
573 559
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 43288259f4a1..1534f2b44caf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -525,6 +525,7 @@ config NETFILTER_XT_TARGET_TPROXY
525 depends on NETFILTER_XTABLES 525 depends on NETFILTER_XTABLES
526 depends on NETFILTER_ADVANCED 526 depends on NETFILTER_ADVANCED
527 select NF_DEFRAG_IPV4 527 select NF_DEFRAG_IPV4
528 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
528 help 529 help
529 This option adds a `TPROXY' target, which is somewhat similar to 530 This option adds a `TPROXY' target, which is somewhat similar to
530 REDIRECT. It can only be used in the mangle table and is useful 531 REDIRECT. It can only be used in the mangle table and is useful
@@ -927,6 +928,7 @@ config NETFILTER_XT_MATCH_SOCKET
927 depends on NETFILTER_ADVANCED 928 depends on NETFILTER_ADVANCED
928 depends on !NF_CONNTRACK || NF_CONNTRACK 929 depends on !NF_CONNTRACK || NF_CONNTRACK
929 select NF_DEFRAG_IPV4 930 select NF_DEFRAG_IPV4
931 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
930 help 932 help
931 This option adds a `socket' match, which can be used to match 933 This option adds a `socket' match, which can be used to match
932 packets for which a TCP or UDP socket lookup finds a valid socket. 934 packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fdaec7daff1d..85dabb86be6f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -105,10 +105,8 @@ EXPORT_SYMBOL(nf_register_hooks);
105 105
106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) 106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
107{ 107{
108 unsigned int i; 108 while (n-- > 0)
109 109 nf_unregister_hook(&reg[n]);
110 for (i = 0; i < n; i++)
111 nf_unregister_hook(&reg[i]);
112} 110}
113EXPORT_SYMBOL(nf_unregister_hooks); 111EXPORT_SYMBOL(nf_unregister_hooks);
114 112
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 46a77d5c3887..a22dac227055 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER && NF_CONNTRACK 6 depends on NET && INET && NETFILTER
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'
235 235
236config IP_VS_FTP 236config IP_VS_FTP
237 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
238 depends on IP_VS_PROTO_TCP && NF_NAT 238 depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
239 select IP_VS_NFCT
239 ---help--- 240 ---help---
240 FTP is a protocol that transfers IP address and/or port number in 241 FTP is a protocol that transfers IP address and/or port number in
241 the payload. In the virtual server via Network Address Translation, 242 the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,19 @@ config IP_VS_FTP
247 If you want to compile it in kernel, say Y. To compile it as a 248 If you want to compile it in kernel, say Y. To compile it as a
248 module, choose M here. If unsure, say N. 249 module, choose M here. If unsure, say N.
249 250
251config IP_VS_NFCT
252 bool "Netfilter connection tracking"
253 depends on NF_CONNTRACK
254 ---help---
255 The Netfilter connection tracking support allows the IPVS
256 connection state to be exported to the Netfilter framework
257 for filtering purposes.
258
259config IP_VS_PE_SIP
260 tristate "SIP persistence engine"
261 depends on IP_VS_PROTO_UDP
262 depends on NF_CONNTRACK_SIP
263 ---help---
264 Allow persistence based on the SIP Call-ID
265
250endif # IP_VS 266endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index e3baefd7066e..34ee602ddb66 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o 9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o 10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
11 11
12ip_vs-extra_objs-y :=
13ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
14
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 15ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 16 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o \ 17 ip_vs_est.o ip_vs_proto.o ip_vs_pe.o \
15 $(ip_vs_proto-objs-y) 18 $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
16 19
17 20
18# IPVS core 21# IPVS core
@@ -32,3 +35,6 @@ obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
32 35
33# IPVS application helpers 36# IPVS application helpers
34obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o 37obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
38
39# IPVS connection template retrievers
40obj-$(CONFIG_IP_VS_PE_SIP) += ip_vs_pe_sip.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index e76f87f4aca8..a475edee0912 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -103,8 +103,8 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
103 goto out; 103 goto out;
104 104
105 list_add(&inc->a_list, &app->incs_list); 105 list_add(&inc->a_list, &app->incs_list);
106 IP_VS_DBG(9, "%s application %s:%u registered\n", 106 IP_VS_DBG(9, "%s App %s:%u registered\n",
107 pp->name, inc->name, inc->port); 107 pp->name, inc->name, ntohs(inc->port));
108 108
109 return 0; 109 return 0;
110 110
@@ -130,7 +130,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
130 pp->unregister_app(inc); 130 pp->unregister_app(inc);
131 131
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 132 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, inc->port); 133 pp->name, inc->name, ntohs(inc->port));
134 134
135 list_del(&inc->a_list); 135 list_del(&inc->a_list);
136 136
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b71c69a2db13..e9adecdc8ca4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -148,6 +148,42 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
148 & ip_vs_conn_tab_mask; 148 & ip_vs_conn_tab_mask;
149} 149}
150 150
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
152 bool inverse)
153{
154 const union nf_inet_addr *addr;
155 __be16 port;
156
157 if (p->pe_data && p->pe->hashkey_raw)
158 return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
159 ip_vs_conn_tab_mask;
160
161 if (likely(!inverse)) {
162 addr = p->caddr;
163 port = p->cport;
164 } else {
165 addr = p->vaddr;
166 port = p->vport;
167 }
168
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
170}
171
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{
174 struct ip_vs_conn_param p;
175
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
177 NULL, 0, &p);
178
179 if (cp->dest && cp->dest->svc->pe) {
180 p.pe = cp->dest->svc->pe;
181 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len;
183 }
184
185 return ip_vs_conn_hashkey_param(&p, false);
186}
151 187
152/* 188/*
153 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
@@ -162,7 +198,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
162 return 0; 198 return 0;
163 199
164 /* Hash by protocol, client address and port */ 200 /* Hash by protocol, client address and port */
165 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 201 hash = ip_vs_conn_hashkey_conn(cp);
166 202
167 ct_write_lock(hash); 203 ct_write_lock(hash);
168 spin_lock(&cp->lock); 204 spin_lock(&cp->lock);
@@ -195,7 +231,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
195 int ret; 231 int ret;
196 232
197 /* unhash it and decrease its reference counter */ 233 /* unhash it and decrease its reference counter */
198 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 234 hash = ip_vs_conn_hashkey_conn(cp);
199 235
200 ct_write_lock(hash); 236 ct_write_lock(hash);
201 spin_lock(&cp->lock); 237 spin_lock(&cp->lock);
@@ -218,27 +254,26 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
218/* 254/*
219 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 255 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
220 * Called for pkts coming from OUTside-to-INside. 256 * Called for pkts coming from OUTside-to-INside.
221 * s_addr, s_port: pkt source address (foreign host) 257 * p->caddr, p->cport: pkt source address (foreign host)
222 * d_addr, d_port: pkt dest address (load balancer) 258 * p->vaddr, p->vport: pkt dest address (load balancer)
223 */ 259 */
224static inline struct ip_vs_conn *__ip_vs_conn_in_get 260static inline struct ip_vs_conn *
225(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 261__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
226 const union nf_inet_addr *d_addr, __be16 d_port)
227{ 262{
228 unsigned hash; 263 unsigned hash;
229 struct ip_vs_conn *cp; 264 struct ip_vs_conn *cp;
230 265
231 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 266 hash = ip_vs_conn_hashkey_param(p, false);
232 267
233 ct_read_lock(hash); 268 ct_read_lock(hash);
234 269
235 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
236 if (cp->af == af && 271 if (cp->af == p->af &&
237 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
238 ip_vs_addr_equal(af, d_addr, &cp->vaddr) && 273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
239 s_port == cp->cport && d_port == cp->vport && 274 p->cport == cp->cport && p->vport == cp->vport &&
240 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
241 protocol == cp->protocol) { 276 p->protocol == cp->protocol) {
242 /* HIT */ 277 /* HIT */
243 atomic_inc(&cp->refcnt); 278 atomic_inc(&cp->refcnt);
244 ct_read_unlock(hash); 279 ct_read_unlock(hash);
@@ -251,99 +286,111 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
251 return NULL; 286 return NULL;
252} 287}
253 288
254struct ip_vs_conn *ip_vs_conn_in_get 289struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
255(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
256 const union nf_inet_addr *d_addr, __be16 d_port)
257{ 290{
258 struct ip_vs_conn *cp; 291 struct ip_vs_conn *cp;
259 292
260 cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); 293 cp = __ip_vs_conn_in_get(p);
261 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) 294 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
262 cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, 295 struct ip_vs_conn_param cport_zero_p = *p;
263 d_port); 296 cport_zero_p.cport = 0;
297 cp = __ip_vs_conn_in_get(&cport_zero_p);
298 }
264 299
265 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", 300 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
266 ip_vs_proto_name(protocol), 301 ip_vs_proto_name(p->protocol),
267 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 302 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
268 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 303 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
269 cp ? "hit" : "not hit"); 304 cp ? "hit" : "not hit");
270 305
271 return cp; 306 return cp;
272} 307}
273 308
309static int
310ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
311 const struct ip_vs_iphdr *iph,
312 unsigned int proto_off, int inverse,
313 struct ip_vs_conn_param *p)
314{
315 __be16 _ports[2], *pptr;
316
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL)
319 return 1;
320
321 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
323 &iph->daddr, pptr[1], p);
324 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
326 &iph->saddr, pptr[0], p);
327 return 0;
328}
329
274struct ip_vs_conn * 330struct ip_vs_conn *
275ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
276 struct ip_vs_protocol *pp, 332 struct ip_vs_protocol *pp,
277 const struct ip_vs_iphdr *iph, 333 const struct ip_vs_iphdr *iph,
278 unsigned int proto_off, int inverse) 334 unsigned int proto_off, int inverse)
279{ 335{
280 __be16 _ports[2], *pptr; 336 struct ip_vs_conn_param p;
281 337
282 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 338 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
283 if (pptr == NULL)
284 return NULL; 339 return NULL;
285 340
286 if (likely(!inverse)) 341 return ip_vs_conn_in_get(&p);
287 return ip_vs_conn_in_get(af, iph->protocol,
288 &iph->saddr, pptr[0],
289 &iph->daddr, pptr[1]);
290 else
291 return ip_vs_conn_in_get(af, iph->protocol,
292 &iph->daddr, pptr[1],
293 &iph->saddr, pptr[0]);
294} 342}
295EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); 343EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
296 344
297/* Get reference to connection template */ 345/* Get reference to connection template */
298struct ip_vs_conn *ip_vs_ct_in_get 346struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
299(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
300 const union nf_inet_addr *d_addr, __be16 d_port)
301{ 347{
302 unsigned hash; 348 unsigned hash;
303 struct ip_vs_conn *cp; 349 struct ip_vs_conn *cp;
304 350
305 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 351 hash = ip_vs_conn_hashkey_param(p, false);
306 352
307 ct_read_lock(hash); 353 ct_read_lock(hash);
308 354
309 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
310 if (cp->af == af && 356 if (p->pe_data && p->pe->ct_match) {
311 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 357 if (p->pe->ct_match(p, cp))
358 goto out;
359 continue;
360 }
361
362 if (cp->af == p->af &&
363 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
312 /* protocol should only be IPPROTO_IP if 364 /* protocol should only be IPPROTO_IP if
313 * d_addr is a fwmark */ 365 * p->vaddr is a fwmark */
314 ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, 366 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
315 d_addr, &cp->vaddr) && 367 p->af, p->vaddr, &cp->vaddr) &&
316 s_port == cp->cport && d_port == cp->vport && 368 p->cport == cp->cport && p->vport == cp->vport &&
317 cp->flags & IP_VS_CONN_F_TEMPLATE && 369 cp->flags & IP_VS_CONN_F_TEMPLATE &&
318 protocol == cp->protocol) { 370 p->protocol == cp->protocol)
319 /* HIT */
320 atomic_inc(&cp->refcnt);
321 goto out; 371 goto out;
322 }
323 } 372 }
324 cp = NULL; 373 cp = NULL;
325 374
326 out: 375 out:
376 if (cp)
377 atomic_inc(&cp->refcnt);
327 ct_read_unlock(hash); 378 ct_read_unlock(hash);
328 379
329 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", 380 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
330 ip_vs_proto_name(protocol), 381 ip_vs_proto_name(p->protocol),
331 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 382 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
332 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 383 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
333 cp ? "hit" : "not hit"); 384 cp ? "hit" : "not hit");
334 385
335 return cp; 386 return cp;
336} 387}
337 388
338/* 389/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
339 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 390 * Called for pkts coming from inside-to-OUTside.
340 * Called for pkts coming from inside-to-OUTside. 391 * p->caddr, p->cport: pkt source address (inside host)
341 * s_addr, s_port: pkt source address (inside host) 392 * p->vaddr, p->vport: pkt dest address (foreign host) */
342 * d_addr, d_port: pkt dest address (foreign host) 393struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
343 */
344struct ip_vs_conn *ip_vs_conn_out_get
345(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
346 const union nf_inet_addr *d_addr, __be16 d_port)
347{ 394{
348 unsigned hash; 395 unsigned hash;
349 struct ip_vs_conn *cp, *ret=NULL; 396 struct ip_vs_conn *cp, *ret=NULL;
@@ -351,16 +398,16 @@ struct ip_vs_conn *ip_vs_conn_out_get
351 /* 398 /*
352 * Check for "full" addressed entries 399 * Check for "full" addressed entries
353 */ 400 */
354 hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); 401 hash = ip_vs_conn_hashkey_param(p, true);
355 402
356 ct_read_lock(hash); 403 ct_read_lock(hash);
357 404
358 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
359 if (cp->af == af && 406 if (cp->af == p->af &&
360 ip_vs_addr_equal(af, d_addr, &cp->caddr) && 407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
361 ip_vs_addr_equal(af, s_addr, &cp->daddr) && 408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
362 d_port == cp->cport && s_port == cp->dport && 409 p->vport == cp->cport && p->cport == cp->dport &&
363 protocol == cp->protocol) { 410 p->protocol == cp->protocol) {
364 /* HIT */ 411 /* HIT */
365 atomic_inc(&cp->refcnt); 412 atomic_inc(&cp->refcnt);
366 ret = cp; 413 ret = cp;
@@ -371,9 +418,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
371 ct_read_unlock(hash); 418 ct_read_unlock(hash);
372 419
373 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", 420 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
374 ip_vs_proto_name(protocol), 421 ip_vs_proto_name(p->protocol),
375 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 422 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
376 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 423 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
377 ret ? "hit" : "not hit"); 424 ret ? "hit" : "not hit");
378 425
379 return ret; 426 return ret;
@@ -385,20 +432,12 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
385 const struct ip_vs_iphdr *iph, 432 const struct ip_vs_iphdr *iph,
386 unsigned int proto_off, int inverse) 433 unsigned int proto_off, int inverse)
387{ 434{
388 __be16 _ports[2], *pptr; 435 struct ip_vs_conn_param p;
389 436
390 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
391 if (pptr == NULL)
392 return NULL; 438 return NULL;
393 439
394 if (likely(!inverse)) 440 return ip_vs_conn_out_get(&p);
395 return ip_vs_conn_out_get(af, iph->protocol,
396 &iph->saddr, pptr[0],
397 &iph->daddr, pptr[1]);
398 else
399 return ip_vs_conn_out_get(af, iph->protocol,
400 &iph->daddr, pptr[1],
401 &iph->saddr, pptr[0]);
402} 441}
403EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 442EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
404 443
@@ -505,6 +544,8 @@ static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
505static inline void 544static inline void
506ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) 545ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
507{ 546{
547 unsigned int conn_flags;
548
508 /* if dest is NULL, then return directly */ 549 /* if dest is NULL, then return directly */
509 if (!dest) 550 if (!dest)
510 return; 551 return;
@@ -512,16 +553,20 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
512 /* Increase the refcnt counter of the dest */ 553 /* Increase the refcnt counter of the dest */
513 atomic_inc(&dest->refcnt); 554 atomic_inc(&dest->refcnt);
514 555
556 conn_flags = atomic_read(&dest->conn_flags);
557 if (cp->protocol != IPPROTO_UDP)
558 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
515 /* Bind with the destination and its corresponding transmitter */ 559 /* Bind with the destination and its corresponding transmitter */
516 if ((cp->flags & IP_VS_CONN_F_SYNC) && 560 if (cp->flags & IP_VS_CONN_F_SYNC) {
517 (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
518 /* if the connection is not template and is created 561 /* if the connection is not template and is created
519 * by sync, preserve the activity flag. 562 * by sync, preserve the activity flag.
520 */ 563 */
521 cp->flags |= atomic_read(&dest->conn_flags) & 564 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
522 (~IP_VS_CONN_F_INACTIVE); 565 conn_flags &= ~IP_VS_CONN_F_INACTIVE;
523 else 566 /* connections inherit forwarding method from dest */
524 cp->flags |= atomic_read(&dest->conn_flags); 567 cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
568 }
569 cp->flags |= conn_flags;
525 cp->dest = dest; 570 cp->dest = dest;
526 571
527 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " 572 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -717,6 +762,10 @@ static void ip_vs_conn_expire(unsigned long data)
717 if (cp->control) 762 if (cp->control)
718 ip_vs_control_del(cp); 763 ip_vs_control_del(cp);
719 764
765 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp);
767
768 kfree(cp->pe_data);
720 if (unlikely(cp->app != NULL)) 769 if (unlikely(cp->app != NULL))
721 ip_vs_unbind_app(cp); 770 ip_vs_unbind_app(cp);
722 ip_vs_unbind_dest(cp); 771 ip_vs_unbind_dest(cp);
@@ -751,13 +800,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
751 * Create a new connection entry and hash it into the ip_vs_conn_tab 800 * Create a new connection entry and hash it into the ip_vs_conn_tab
752 */ 801 */
753struct ip_vs_conn * 802struct ip_vs_conn *
754ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, 803ip_vs_conn_new(const struct ip_vs_conn_param *p,
755 const union nf_inet_addr *vaddr, __be16 vport,
756 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
757 struct ip_vs_dest *dest) 805 struct ip_vs_dest *dest)
758{ 806{
759 struct ip_vs_conn *cp; 807 struct ip_vs_conn *cp;
760 struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
761 809
762 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
763 if (cp == NULL) { 811 if (cp == NULL) {
@@ -767,17 +815,21 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
767 815
768 INIT_LIST_HEAD(&cp->c_list); 816 INIT_LIST_HEAD(&cp->c_list);
769 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
770 cp->af = af; 818 cp->af = p->af;
771 cp->protocol = proto; 819 cp->protocol = p->protocol;
772 ip_vs_addr_copy(af, &cp->caddr, caddr); 820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
773 cp->cport = cport; 821 cp->cport = p->cport;
774 ip_vs_addr_copy(af, &cp->vaddr, vaddr); 822 ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
775 cp->vport = vport; 823 cp->vport = p->vport;
776 /* proto should only be IPPROTO_IP if d_addr is a fwmark */ 824 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
777 ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, 825 ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
778 &cp->daddr, daddr); 826 &cp->daddr, daddr);
779 cp->dport = dport; 827 cp->dport = dport;
780 cp->flags = flags; 828 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
830 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len;
832 }
781 spin_lock_init(&cp->lock); 833 spin_lock_init(&cp->lock);
782 834
783 /* 835 /*
@@ -803,7 +855,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
803 855
804 /* Bind its packet transmitter */ 856 /* Bind its packet transmitter */
805#ifdef CONFIG_IP_VS_IPV6 857#ifdef CONFIG_IP_VS_IPV6
806 if (af == AF_INET6) 858 if (p->af == AF_INET6)
807 ip_vs_bind_xmit_v6(cp); 859 ip_vs_bind_xmit_v6(cp);
808 else 860 else
809#endif 861#endif
@@ -812,13 +864,22 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
812 if (unlikely(pp && atomic_read(&pp->appcnt))) 864 if (unlikely(pp && atomic_read(&pp->appcnt)))
813 ip_vs_bind_app(cp, pp); 865 ip_vs_bind_app(cp, pp);
814 866
867 /*
868 * Allow conntrack to be preserved. By default, conntrack
869 * is created and destroyed for every packet.
870 * Sometimes keeping conntrack can be useful for
871 * IP_VS_CONN_F_ONE_PACKET too.
872 */
873
874 if (ip_vs_conntrack_enabled())
875 cp->flags |= IP_VS_CONN_F_NFCT;
876
815 /* Hash it in the ip_vs_conn_tab finally */ 877 /* Hash it in the ip_vs_conn_tab finally */
816 ip_vs_conn_hash(cp); 878 ip_vs_conn_hash(cp);
817 879
818 return cp; 880 return cp;
819} 881}
820 882
821
822/* 883/*
823 * /proc/net/ip_vs_conn entries 884 * /proc/net/ip_vs_conn entries
824 */ 885 */
@@ -834,7 +895,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
834 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
835 if (pos-- == 0) { 896 if (pos-- == 0) {
836 seq->private = &ip_vs_conn_tab[idx]; 897 seq->private = &ip_vs_conn_tab[idx];
837 return cp; 898 return cp;
838 } 899 }
839 } 900 }
840 ct_read_unlock_bh(idx); 901 ct_read_unlock_bh(idx);
@@ -891,30 +952,45 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
891 952
892 if (v == SEQ_START_TOKEN) 953 if (v == SEQ_START_TOKEN)
893 seq_puts(seq, 954 seq_puts(seq,
894 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n"); 955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
895 else { 956 else {
896 const struct ip_vs_conn *cp = v; 957 const struct ip_vs_conn *cp = v;
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0;
960
961 if (cp->dest && cp->pe_data &&
962 cp->dest->svc->pe->show_pe_data) {
963 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
966 pe_data[len + 1] = ' ';
967 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp,
969 pe_data + len);
970 }
971 pe_data[len] = '\0';
897 972
898#ifdef CONFIG_IP_VS_IPV6 973#ifdef CONFIG_IP_VS_IPV6
899 if (cp->af == AF_INET6) 974 if (cp->af == AF_INET6)
900 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", 975 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
976 "%pI6 %04X %-11s %7lu%s\n",
901 ip_vs_proto_name(cp->protocol), 977 ip_vs_proto_name(cp->protocol),
902 &cp->caddr.in6, ntohs(cp->cport), 978 &cp->caddr.in6, ntohs(cp->cport),
903 &cp->vaddr.in6, ntohs(cp->vport), 979 &cp->vaddr.in6, ntohs(cp->vport),
904 &cp->daddr.in6, ntohs(cp->dport), 980 &cp->daddr.in6, ntohs(cp->dport),
905 ip_vs_state_name(cp->protocol, cp->state), 981 ip_vs_state_name(cp->protocol, cp->state),
906 (cp->timer.expires-jiffies)/HZ); 982 (cp->timer.expires-jiffies)/HZ, pe_data);
907 else 983 else
908#endif 984#endif
909 seq_printf(seq, 985 seq_printf(seq,
910 "%-3s %08X %04X %08X %04X" 986 "%-3s %08X %04X %08X %04X"
911 " %08X %04X %-11s %7lu\n", 987 " %08X %04X %-11s %7lu%s\n",
912 ip_vs_proto_name(cp->protocol), 988 ip_vs_proto_name(cp->protocol),
913 ntohl(cp->caddr.ip), ntohs(cp->cport), 989 ntohl(cp->caddr.ip), ntohs(cp->cport),
914 ntohl(cp->vaddr.ip), ntohs(cp->vport), 990 ntohl(cp->vaddr.ip), ntohs(cp->vport),
915 ntohl(cp->daddr.ip), ntohs(cp->dport), 991 ntohl(cp->daddr.ip), ntohs(cp->dport),
916 ip_vs_state_name(cp->protocol, cp->state), 992 ip_vs_state_name(cp->protocol, cp->state),
917 (cp->timer.expires-jiffies)/HZ); 993 (cp->timer.expires-jiffies)/HZ, pe_data);
918 } 994 }
919 return 0; 995 return 0;
920} 996}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4c2f89df5cce..b4e51e9c5a04 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -40,6 +40,7 @@
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h>
43 44
44#include <linux/netfilter.h> 45#include <linux/netfilter.h>
45#include <linux/netfilter_ipv4.h> 46#include <linux/netfilter_ipv4.h>
@@ -47,6 +48,7 @@
47#ifdef CONFIG_IP_VS_IPV6 48#ifdef CONFIG_IP_VS_IPV6
48#include <net/ipv6.h> 49#include <net/ipv6.h>
49#include <linux/netfilter_ipv6.h> 50#include <linux/netfilter_ipv6.h>
51#include <net/ip6_route.h>
50#endif 52#endif
51 53
52#include <net/ip_vs.h> 54#include <net/ip_vs.h>
@@ -175,6 +177,18 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
175 return pp->state_transition(cp, direction, skb, pp); 177 return pp->state_transition(cp, direction, skb, pp);
176} 178}
177 179
180static inline void
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p)
186{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
188 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb);
191}
178 192
179/* 193/*
180 * IPVS persistent scheduling function 194 * IPVS persistent scheduling function
@@ -185,15 +199,16 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
185 */ 199 */
186static struct ip_vs_conn * 200static struct ip_vs_conn *
187ip_vs_sched_persist(struct ip_vs_service *svc, 201ip_vs_sched_persist(struct ip_vs_service *svc,
188 const struct sk_buff *skb, 202 struct sk_buff *skb,
189 __be16 ports[2]) 203 __be16 ports[2])
190{ 204{
191 struct ip_vs_conn *cp = NULL; 205 struct ip_vs_conn *cp = NULL;
192 struct ip_vs_iphdr iph; 206 struct ip_vs_iphdr iph;
193 struct ip_vs_dest *dest; 207 struct ip_vs_dest *dest;
194 struct ip_vs_conn *ct; 208 struct ip_vs_conn *ct;
195 __be16 dport; /* destination port to forward */ 209 __be16 dport = 0; /* destination port to forward */
196 __be16 flags; 210 unsigned int flags;
211 struct ip_vs_conn_param param;
197 union nf_inet_addr snet; /* source network of the client, 212 union nf_inet_addr snet; /* source network of the client,
198 after masking */ 213 after masking */
199 214
@@ -226,120 +241,75 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
226 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> 241 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
227 * is created for other persistent services. 242 * is created for other persistent services.
228 */ 243 */
229 if (ports[1] == svc->port) { 244 {
230 /* Check if a template already exists */ 245 int protocol = iph.protocol;
231 if (svc->port != FTPPORT) 246 const union nf_inet_addr *vaddr = &iph.daddr;
232 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
233 &iph.daddr, ports[1]); 248 __be16 vport = 0;
234 else 249
235 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 250 if (ports[1] == svc->port) {
236 &iph.daddr, 0); 251 /* non-FTP template:
237 252 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
238 if (!ct || !ip_vs_check_template(ct)) { 253 * FTP template:
239 /* 254 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
240 * No template found or the dest of the connection
241 * template is not available.
242 */
243 dest = svc->scheduler->schedule(svc, skb);
244 if (dest == NULL) {
245 IP_VS_DBG(1, "p-schedule: no dest found.\n");
246 return NULL;
247 }
248
249 /*
250 * Create a template like <protocol,caddr,0,
251 * vaddr,vport,daddr,dport> for non-ftp service,
252 * and <protocol,caddr,0,vaddr,0,daddr,0>
253 * for ftp service.
254 */ 255 */
255 if (svc->port != FTPPORT) 256 if (svc->port != FTPPORT)
256 ct = ip_vs_conn_new(svc->af, iph.protocol, 257 vport = ports[1];
257 &snet, 0,
258 &iph.daddr,
259 ports[1],
260 &dest->addr, dest->port,
261 IP_VS_CONN_F_TEMPLATE,
262 dest);
263 else
264 ct = ip_vs_conn_new(svc->af, iph.protocol,
265 &snet, 0,
266 &iph.daddr, 0,
267 &dest->addr, 0,
268 IP_VS_CONN_F_TEMPLATE,
269 dest);
270 if (ct == NULL)
271 return NULL;
272
273 ct->timeout = svc->timeout;
274 } else { 258 } else {
275 /* set destination with the found template */ 259 /* Note: persistent fwmark-based services and
276 dest = ct->dest; 260 * persistent port zero service are handled here.
277 } 261 * fwmark template:
278 dport = dest->port; 262 * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
279 } else { 263 * port zero template:
280 /* 264 * <protocol,caddr,0,vaddr,0,daddr,0>
281 * Note: persistent fwmark-based services and persistent
282 * port zero service are handled here.
283 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
284 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
285 */
286 if (svc->fwmark) {
287 union nf_inet_addr fwmark = {
288 .ip = htonl(svc->fwmark)
289 };
290
291 ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
292 &fwmark, 0);
293 } else
294 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
295 &iph.daddr, 0);
296
297 if (!ct || !ip_vs_check_template(ct)) {
298 /*
299 * If it is not persistent port zero, return NULL,
300 * otherwise create a connection template.
301 */ 265 */
302 if (svc->port) 266 if (svc->fwmark) {
303 return NULL; 267 protocol = IPPROTO_IP;
304 268 vaddr = &fwmark;
305 dest = svc->scheduler->schedule(svc, skb);
306 if (dest == NULL) {
307 IP_VS_DBG(1, "p-schedule: no dest found.\n");
308 return NULL;
309 } 269 }
270 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
272 vaddr, vport, &param);
273 }
310 274
311 /* 275 /* Check if a template already exists */
312 * Create a template according to the service 276 ct = ip_vs_ct_in_get(&param);
313 */ 277 if (!ct || !ip_vs_check_template(ct)) {
314 if (svc->fwmark) { 278 /* No template found or the dest of the connection
315 union nf_inet_addr fwmark = { 279 * template is not available.
316 .ip = htonl(svc->fwmark) 280 */
317 }; 281 dest = svc->scheduler->schedule(svc, skb);
318 282 if (!dest) {
319 ct = ip_vs_conn_new(svc->af, IPPROTO_IP, 283 IP_VS_DBG(1, "p-schedule: no dest found.\n");
320 &snet, 0, 284 kfree(param.pe_data);
321 &fwmark, 0, 285 return NULL;
322 &dest->addr, 0,
323 IP_VS_CONN_F_TEMPLATE,
324 dest);
325 } else
326 ct = ip_vs_conn_new(svc->af, iph.protocol,
327 &snet, 0,
328 &iph.daddr, 0,
329 &dest->addr, 0,
330 IP_VS_CONN_F_TEMPLATE,
331 dest);
332 if (ct == NULL)
333 return NULL;
334
335 ct->timeout = svc->timeout;
336 } else {
337 /* set destination with the found template */
338 dest = ct->dest;
339 } 286 }
340 dport = ports[1]; 287
288 if (ports[1] == svc->port && svc->port != FTPPORT)
289 dport = dest->port;
290
291 /* Create a template
292 * This adds param.pe_data to the template,
293 * and thus param.pe_data will be destroyed
294 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest);
297 if (ct == NULL) {
298 kfree(param.pe_data);
299 return NULL;
300 }
301
302 ct->timeout = svc->timeout;
303 } else {
304 /* set destination with the found template */
305 dest = ct->dest;
306 kfree(param.pe_data);
341 } 307 }
342 308
309 dport = ports[1];
310 if (dport == svc->port && dest->port)
311 dport = dest->port;
312
343 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 313 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
344 && iph.protocol == IPPROTO_UDP)? 314 && iph.protocol == IPPROTO_UDP)?
345 IP_VS_CONN_F_ONE_PACKET : 0; 315 IP_VS_CONN_F_ONE_PACKET : 0;
@@ -347,12 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
347 /* 317 /*
348 * Create a new connection according to the template 318 * Create a new connection according to the template
349 */ 319 */
350 cp = ip_vs_conn_new(svc->af, iph.protocol, 320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
351 &iph.saddr, ports[0], 321 &iph.daddr, ports[1], &param);
352 &iph.daddr, ports[1], 322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
353 &dest->addr, dport,
354 flags,
355 dest);
356 if (cp == NULL) { 323 if (cp == NULL) {
357 ip_vs_conn_put(ct); 324 ip_vs_conn_put(ct);
358 return NULL; 325 return NULL;
@@ -376,23 +343,53 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
376 * Protocols supported: TCP, UDP 343 * Protocols supported: TCP, UDP
377 */ 344 */
378struct ip_vs_conn * 345struct ip_vs_conn *
379ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored)
380{ 348{
381 struct ip_vs_conn *cp = NULL; 349 struct ip_vs_conn *cp = NULL;
382 struct ip_vs_iphdr iph; 350 struct ip_vs_iphdr iph;
383 struct ip_vs_dest *dest; 351 struct ip_vs_dest *dest;
384 __be16 _ports[2], *pptr, flags; 352 __be16 _ports[2], *pptr;
353 unsigned int flags;
385 354
355 *ignored = 1;
386 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 356 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
387 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 357 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
388 if (pptr == NULL) 358 if (pptr == NULL)
389 return NULL; 359 return NULL;
390 360
391 /* 361 /*
362 * FTPDATA needs this check when using local real server.
363 * Never schedule Active FTPDATA connections from real server.
364 * For LVS-NAT they must be already created. For other methods
365 * with persistence the connection is created on SYN+ACK.
366 */
367 if (pptr[0] == FTPDATA) {
368 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
369 "Not scheduling FTPDATA");
370 return NULL;
371 }
372
373 /*
374 * Do not schedule replies from local real server. It is risky
375 * for fwmark services but mostly for persistent services.
376 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp);
383 return NULL;
384 }
385
386 /*
392 * Persistent service 387 * Persistent service
393 */ 388 */
394 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
390 *ignored = 0;
395 return ip_vs_sched_persist(svc, skb, pptr); 391 return ip_vs_sched_persist(svc, skb, pptr);
392 }
396 393
397 /* 394 /*
398 * Non-persistent service 395 * Non-persistent service
@@ -405,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
405 return NULL; 402 return NULL;
406 } 403 }
407 404
405 *ignored = 0;
406
408 dest = svc->scheduler->schedule(svc, skb); 407 dest = svc->scheduler->schedule(svc, skb);
409 if (dest == NULL) { 408 if (dest == NULL) {
410 IP_VS_DBG(1, "Schedule: no dest found.\n"); 409 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -418,14 +417,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
418 /* 417 /*
419 * Create a connection entry. 418 * Create a connection entry.
420 */ 419 */
421 cp = ip_vs_conn_new(svc->af, iph.protocol, 420 {
422 &iph.saddr, pptr[0], 421 struct ip_vs_conn_param p;
423 &iph.daddr, pptr[1], 422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
424 &dest->addr, dest->port ? dest->port : pptr[1], 423 pptr[0], &iph.daddr, pptr[1], &p);
425 flags, 424 cp = ip_vs_conn_new(&p, &dest->addr,
426 dest); 425 dest->port ? dest->port : pptr[1],
427 if (cp == NULL) 426 flags, dest);
428 return NULL; 427 if (!cp)
428 return NULL;
429 }
429 430
430 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
431 "d:%s:%u conn->flags:%X conn->refcnt:%d\n", 432 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
@@ -472,23 +473,26 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
472 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
473 int ret, cs; 474 int ret, cs;
474 struct ip_vs_conn *cp; 475 struct ip_vs_conn *cp;
475 __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
476 iph.protocol == IPPROTO_UDP)? 477 iph.protocol == IPPROTO_UDP)?
477 IP_VS_CONN_F_ONE_PACKET : 0; 478 IP_VS_CONN_F_ONE_PACKET : 0;
478 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 479 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
479 480
480 ip_vs_service_put(svc); 481 ip_vs_service_put(svc);
481 482
482 /* create a new connection entry */ 483 /* create a new connection entry */
483 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
484 cp = ip_vs_conn_new(svc->af, iph.protocol, 485 {
485 &iph.saddr, pptr[0], 486 struct ip_vs_conn_param p;
486 &iph.daddr, pptr[1], 487 ip_vs_conn_fill_param(svc->af, iph.protocol,
487 &daddr, 0, 488 &iph.saddr, pptr[0],
488 IP_VS_CONN_F_BYPASS | flags, 489 &iph.daddr, pptr[1], &p);
489 NULL); 490 cp = ip_vs_conn_new(&p, &daddr, 0,
490 if (cp == NULL) 491 IP_VS_CONN_F_BYPASS | flags,
491 return NF_DROP; 492 NULL);
493 if (!cp)
494 return NF_DROP;
495 }
492 496
493 /* statistics */ 497 /* statistics */
494 ip_vs_in_stats(cp, skb); 498 ip_vs_in_stats(cp, skb);
@@ -526,9 +530,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
526 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ 530 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
527 */ 531 */
528#ifdef CONFIG_IP_VS_IPV6 532#ifdef CONFIG_IP_VS_IPV6
529 if (svc->af == AF_INET6) 533 if (svc->af == AF_INET6) {
534 if (!skb->dev) {
535 struct net *net = dev_net(skb_dst(skb)->dev);
536
537 skb->dev = net->loopback_dev;
538 }
530 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 539 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
531 else 540 } else
532#endif 541#endif
533 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 542 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
534 543
@@ -540,6 +549,15 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
540 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
541} 550}
542 551
552static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
553{
554 if (NF_INET_LOCAL_IN == hooknum)
555 return IP_DEFRAG_VS_IN;
556 if (NF_INET_FORWARD == hooknum)
557 return IP_DEFRAG_VS_FWD;
558 return IP_DEFRAG_VS_OUT;
559}
560
543static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 561static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
544{ 562{
545 int err = ip_defrag(skb, user); 563 int err = ip_defrag(skb, user);
@@ -600,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
600 skb->ip_summed = CHECKSUM_UNNECESSARY; 618 skb->ip_summed = CHECKSUM_UNNECESSARY;
601 619
602 if (inout) 620 if (inout)
603 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 621 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
604 "Forwarding altered outgoing ICMP"); 622 "Forwarding altered outgoing ICMP");
605 else 623 else
606 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 624 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
607 "Forwarding altered incoming ICMP"); 625 "Forwarding altered incoming ICMP");
608} 626}
609 627
@@ -637,17 +655,21 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
637 } 655 }
638 656
639 /* And finally the ICMP checksum */ 657 /* And finally the ICMP checksum */
640 icmph->icmp6_cksum = 0; 658 icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
641 /* TODO IPv6: is this correct for ICMPv6? */ 659 skb->len - icmp_offset,
642 ip_vs_checksum_complete(skb, icmp_offset); 660 IPPROTO_ICMPV6, 0);
643 skb->ip_summed = CHECKSUM_UNNECESSARY; 661 skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
662 skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
663 skb->ip_summed = CHECKSUM_PARTIAL;
644 664
645 if (inout) 665 if (inout)
646 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 666 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
647 "Forwarding altered outgoing ICMPv6"); 667 (void *)ciph - (void *)iph,
668 "Forwarding altered outgoing ICMPv6");
648 else 669 else
649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 670 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
650 "Forwarding altered incoming ICMPv6"); 671 (void *)ciph - (void *)iph,
672 "Forwarding altered incoming ICMPv6");
651} 673}
652#endif 674#endif
653 675
@@ -688,10 +710,25 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
688#endif 710#endif
689 ip_vs_nat_icmp(skb, pp, cp, 1); 711 ip_vs_nat_icmp(skb, pp, cp, 1);
690 712
713#ifdef CONFIG_IP_VS_IPV6
714 if (af == AF_INET6) {
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723
691 /* do the statistics and put it back */ 724 /* do the statistics and put it back */
692 ip_vs_out_stats(cp, skb); 725 ip_vs_out_stats(cp, skb);
693 726
694 skb->ipvs_property = 1; 727 skb->ipvs_property = 1;
728 if (!(cp->flags & IP_VS_CONN_F_NFCT))
729 ip_vs_notrack(skb);
730 else
731 ip_vs_update_conntrack(skb, cp, 0);
695 verdict = NF_ACCEPT; 732 verdict = NF_ACCEPT;
696 733
697out: 734out:
@@ -705,7 +742,8 @@ out:
705 * Find any that might be relevant, check against existing connections. 742 * Find any that might be relevant, check against existing connections.
706 * Currently handles error types - unreachable, quench, ttl exceeded. 743 * Currently handles error types - unreachable, quench, ttl exceeded.
707 */ 744 */
708static int ip_vs_out_icmp(struct sk_buff *skb, int *related) 745static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
746 unsigned int hooknum)
709{ 747{
710 struct iphdr *iph; 748 struct iphdr *iph;
711 struct icmphdr _icmph, *ic; 749 struct icmphdr _icmph, *ic;
@@ -720,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
720 758
721 /* reassemble IP fragments */ 759 /* reassemble IP fragments */
722 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 760 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
723 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 761 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
724 return NF_STOLEN; 762 return NF_STOLEN;
725 } 763 }
726 764
@@ -763,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
763 pp->dont_defrag)) 801 pp->dont_defrag))
764 return NF_ACCEPT; 802 return NF_ACCEPT;
765 803
766 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); 804 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
805 "Checking outgoing ICMP for");
767 806
768 offset += cih->ihl * 4; 807 offset += cih->ihl * 4;
769 808
@@ -779,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
779} 818}
780 819
781#ifdef CONFIG_IP_VS_IPV6 820#ifdef CONFIG_IP_VS_IPV6
782static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) 821static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
822 unsigned int hooknum)
783{ 823{
784 struct ipv6hdr *iph; 824 struct ipv6hdr *iph;
785 struct icmp6hdr _icmph, *ic; 825 struct icmp6hdr _icmph, *ic;
@@ -795,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
795 835
796 /* reassemble IP fragments */ 836 /* reassemble IP fragments */
797 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 837 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
798 if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) 838 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
799 return NF_STOLEN; 839 return NF_STOLEN;
800 } 840 }
801 841
@@ -838,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
838 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 878 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
839 return NF_ACCEPT; 879 return NF_ACCEPT;
840 880
841 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); 881 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
882 "Checking outgoing ICMPv6 for");
842 883
843 offset += sizeof(struct ipv6hdr); 884 offset += sizeof(struct ipv6hdr);
844 885
@@ -886,7 +927,7 @@ static unsigned int
886handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
887 struct ip_vs_conn *cp, int ihl) 928 struct ip_vs_conn *cp, int ihl)
888{ 929{
889 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); 930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
890 931
891 if (!skb_make_writable(skb, ihl)) 932 if (!skb_make_writable(skb, ihl))
892 goto drop; 933 goto drop;
@@ -905,6 +946,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
905 ip_send_check(ip_hdr(skb)); 946 ip_send_check(ip_hdr(skb));
906 } 947 }
907 948
949 /*
950 * nf_iterate does not expect change in the skb->dst->dev.
951 * It looks like it is not fatal to enable this code for hooks
952 * where our handlers are at the end of the chain list and
953 * when all next handlers use skb->dst->dev and not outdev.
954 * It will definitely route properly the inout NAT traffic
955 * when multiple paths are used.
956 */
957
908 /* For policy routing, packets originating from this 958 /* For policy routing, packets originating from this
909 * machine itself may be routed differently to packets 959 * machine itself may be routed differently to packets
910 * passing through. We want this packet to be routed as 960 * passing through. We want this packet to be routed as
@@ -913,21 +963,25 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
913 */ 963 */
914#ifdef CONFIG_IP_VS_IPV6 964#ifdef CONFIG_IP_VS_IPV6
915 if (af == AF_INET6) { 965 if (af == AF_INET6) {
916 if (ip6_route_me_harder(skb) != 0) 966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
917 goto drop; 967 goto drop;
918 } else 968 } else
919#endif 969#endif
920 if (ip_route_me_harder(skb, RTN_LOCAL) != 0) 970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
921 goto drop; 973 goto drop;
922 974
923 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); 975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
924 976
925 ip_vs_out_stats(cp, skb); 977 ip_vs_out_stats(cp, skb);
926 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
927 ip_vs_update_conntrack(skb, cp, 0);
928 ip_vs_conn_put(cp);
929
930 skb->ipvs_property = 1; 979 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb);
982 else
983 ip_vs_update_conntrack(skb, cp, 0);
984 ip_vs_conn_put(cp);
931 985
932 LeaveFunction(11); 986 LeaveFunction(11);
933 return NF_ACCEPT; 987 return NF_ACCEPT;
@@ -935,35 +989,46 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
935drop: 989drop:
936 ip_vs_conn_put(cp); 990 ip_vs_conn_put(cp);
937 kfree_skb(skb); 991 kfree_skb(skb);
992 LeaveFunction(11);
938 return NF_STOLEN; 993 return NF_STOLEN;
939} 994}
940 995
941/* 996/*
942 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
943 * Check if outgoing packet belongs to the established ip_vs_conn. 997 * Check if outgoing packet belongs to the established ip_vs_conn.
944 */ 998 */
945static unsigned int 999static unsigned int
946ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
947 const struct net_device *in, const struct net_device *out,
948 int (*okfn)(struct sk_buff *))
949{ 1001{
950 struct ip_vs_iphdr iph; 1002 struct ip_vs_iphdr iph;
951 struct ip_vs_protocol *pp; 1003 struct ip_vs_protocol *pp;
952 struct ip_vs_conn *cp; 1004 struct ip_vs_conn *cp;
953 int af;
954 1005
955 EnterFunction(11); 1006 EnterFunction(11);
956 1007
957 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1008 /* Already marked as IPVS request or reply? */
958
959 if (skb->ipvs_property) 1009 if (skb->ipvs_property)
960 return NF_ACCEPT; 1010 return NF_ACCEPT;
961 1011
1012 /* Bad... Do not break raw sockets */
1013 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1014 af == AF_INET)) {
1015 struct sock *sk = skb->sk;
1016 struct inet_sock *inet = inet_sk(skb->sk);
1017
1018 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1019 return NF_ACCEPT;
1020 }
1021
1022 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT;
1024
962 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
963#ifdef CONFIG_IP_VS_IPV6 1026#ifdef CONFIG_IP_VS_IPV6
964 if (af == AF_INET6) { 1027 if (af == AF_INET6) {
965 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1028 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
966 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1029 int related;
1030 int verdict = ip_vs_out_icmp_v6(skb, &related,
1031 hooknum);
967 1032
968 if (related) 1033 if (related)
969 return verdict; 1034 return verdict;
@@ -972,7 +1037,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
972 } else 1037 } else
973#endif 1038#endif
974 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1039 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
975 int related, verdict = ip_vs_out_icmp(skb, &related); 1040 int related;
1041 int verdict = ip_vs_out_icmp(skb, &related, hooknum);
976 1042
977 if (related) 1043 if (related)
978 return verdict; 1044 return verdict;
@@ -986,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
986 /* reassemble IP fragments */ 1052 /* reassemble IP fragments */
987#ifdef CONFIG_IP_VS_IPV6 1053#ifdef CONFIG_IP_VS_IPV6
988 if (af == AF_INET6) { 1054 if (af == AF_INET6) {
989 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1055 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
990 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1056 if (ip_vs_gather_frags_v6(skb,
991 1057 ip_vs_defrag_user(hooknum)))
992 if (related) 1058 return NF_STOLEN;
993 return verdict;
994
995 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
996 } 1059 }
1060
1061 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
997 } else 1062 } else
998#endif 1063#endif
999 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && 1064 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
1000 !pp->dont_defrag)) { 1065 !pp->dont_defrag)) {
1001 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 1066 if (ip_vs_gather_frags(skb,
1067 ip_vs_defrag_user(hooknum)))
1002 return NF_STOLEN; 1068 return NF_STOLEN;
1003 1069
1004 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1070 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1009,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
1009 */ 1075 */
1010 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1011 1077
1012 if (unlikely(!cp)) { 1078 if (likely(cp))
1013 if (sysctl_ip_vs_nat_icmp_send && 1079 return handle_response(af, skb, pp, cp, iph.len);
1014 (pp->protocol == IPPROTO_TCP || 1080 if (sysctl_ip_vs_nat_icmp_send &&
1015 pp->protocol == IPPROTO_UDP || 1081 (pp->protocol == IPPROTO_TCP ||
1016 pp->protocol == IPPROTO_SCTP)) { 1082 pp->protocol == IPPROTO_UDP ||
1017 __be16 _ports[2], *pptr; 1083 pp->protocol == IPPROTO_SCTP)) {
1018 1084 __be16 _ports[2], *pptr;
1019 pptr = skb_header_pointer(skb, iph.len, 1085
1020 sizeof(_ports), _ports); 1086 pptr = skb_header_pointer(skb, iph.len,
1021 if (pptr == NULL) 1087 sizeof(_ports), _ports);
1022 return NF_ACCEPT; /* Not for me */ 1088 if (pptr == NULL)
1023 if (ip_vs_lookup_real_service(af, iph.protocol, 1089 return NF_ACCEPT; /* Not for me */
1024 &iph.saddr, 1090 if (ip_vs_lookup_real_service(af, iph.protocol,
1025 pptr[0])) { 1091 &iph.saddr,
1026 /* 1092 pptr[0])) {
1027 * Notify the real server: there is no 1093 /*
1028 * existing entry if it is not RST 1094 * Notify the real server: there is no
1029 * packet or not TCP packet. 1095 * existing entry if it is not RST
1030 */ 1096 * packet or not TCP packet.
1031 if ((iph.protocol != IPPROTO_TCP && 1097 */
1032 iph.protocol != IPPROTO_SCTP) 1098 if ((iph.protocol != IPPROTO_TCP &&
1033 || ((iph.protocol == IPPROTO_TCP 1099 iph.protocol != IPPROTO_SCTP)
1034 && !is_tcp_reset(skb, iph.len)) 1100 || ((iph.protocol == IPPROTO_TCP
1035 || (iph.protocol == IPPROTO_SCTP 1101 && !is_tcp_reset(skb, iph.len))
1036 && !is_sctp_abort(skb, 1102 || (iph.protocol == IPPROTO_SCTP
1037 iph.len)))) { 1103 && !is_sctp_abort(skb,
1104 iph.len)))) {
1038#ifdef CONFIG_IP_VS_IPV6 1105#ifdef CONFIG_IP_VS_IPV6
1039 if (af == AF_INET6) 1106 if (af == AF_INET6) {
1040 icmpv6_send(skb, 1107 struct net *net =
1041 ICMPV6_DEST_UNREACH, 1108 dev_net(skb_dst(skb)->dev);
1042 ICMPV6_PORT_UNREACH, 1109
1043 0); 1110 if (!skb->dev)
1044 else 1111 skb->dev = net->loopback_dev;
1112 icmpv6_send(skb,
1113 ICMPV6_DEST_UNREACH,
1114 ICMPV6_PORT_UNREACH,
1115 0);
1116 } else
1045#endif 1117#endif
1046 icmp_send(skb, 1118 icmp_send(skb,
1047 ICMP_DEST_UNREACH, 1119 ICMP_DEST_UNREACH,
1048 ICMP_PORT_UNREACH, 0); 1120 ICMP_PORT_UNREACH, 0);
1049 return NF_DROP; 1121 return NF_DROP;
1050 }
1051 } 1122 }
1052 } 1123 }
1053 IP_VS_DBG_PKT(12, pp, skb, 0,
1054 "packet continues traversal as normal");
1055 return NF_ACCEPT;
1056 } 1124 }
1125 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1126 "ip_vs_out: packet continues traversal as normal");
1127 return NF_ACCEPT;
1128}
1129
1130/*
1131 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1132 * used only for VS/NAT.
1133 * Check if packet is reply for established ip_vs_conn.
1134 */
1135static unsigned int
1136ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1137 const struct net_device *in, const struct net_device *out,
1138 int (*okfn)(struct sk_buff *))
1139{
1140 return ip_vs_out(hooknum, skb, AF_INET);
1141}
1142
1143/*
1144 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1145 * Check if packet is reply for established ip_vs_conn.
1146 */
1147static unsigned int
1148ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1149 const struct net_device *in, const struct net_device *out,
1150 int (*okfn)(struct sk_buff *))
1151{
1152 unsigned int verdict;
1153
1154 /* Disable BH in LOCAL_OUT until all places are fixed */
1155 local_bh_disable();
1156 verdict = ip_vs_out(hooknum, skb, AF_INET);
1157 local_bh_enable();
1158 return verdict;
1159}
1160
1161#ifdef CONFIG_IP_VS_IPV6
1162
1163/*
1164 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1165 * used only for VS/NAT.
1166 * Check if packet is reply for established ip_vs_conn.
1167 */
1168static unsigned int
1169ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1170 const struct net_device *in, const struct net_device *out,
1171 int (*okfn)(struct sk_buff *))
1172{
1173 return ip_vs_out(hooknum, skb, AF_INET6);
1174}
1057 1175
1058 return handle_response(af, skb, pp, cp, iph.len); 1176/*
1177 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1178 * Check if packet is reply for established ip_vs_conn.
1179 */
1180static unsigned int
1181ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1182 const struct net_device *in, const struct net_device *out,
1183 int (*okfn)(struct sk_buff *))
1184{
1185 unsigned int verdict;
1186
1187 /* Disable BH in LOCAL_OUT until all places are fixed */
1188 local_bh_disable();
1189 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1190 local_bh_enable();
1191 return verdict;
1059} 1192}
1060 1193
1194#endif
1061 1195
1062/* 1196/*
1063 * Handle ICMP messages in the outside-to-inside direction (incoming). 1197 * Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1081,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1081 1215
1082 /* reassemble IP fragments */ 1216 /* reassemble IP fragments */
1083 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 1217 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
1084 if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? 1218 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
1085 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
1086 return NF_STOLEN; 1219 return NF_STOLEN;
1087 } 1220 }
1088 1221
@@ -1125,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1125 pp->dont_defrag)) 1258 pp->dont_defrag))
1126 return NF_ACCEPT; 1259 return NF_ACCEPT;
1127 1260
1128 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); 1261 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
1262 "Checking incoming ICMP for");
1129 1263
1130 offset += cih->ihl * 4; 1264 offset += cih->ihl * 4;
1131 1265
@@ -1159,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1159 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1293 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1160 offset += 2 * sizeof(__u16); 1294 offset += 2 * sizeof(__u16);
1161 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); 1295 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
1162 /* do not touch skb anymore */ 1296 /* LOCALNODE from FORWARD hook is not supported */
1297 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1298 skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
1299 IP_VS_DBG(1, "%s(): "
1300 "local delivery to %pI4 but in FORWARD\n",
1301 __func__, &skb_rtable(skb)->rt_dst);
1302 verdict = NF_DROP;
1303 }
1163 1304
1164 out: 1305 out:
1165 __ip_vs_conn_put(cp); 1306 __ip_vs_conn_put(cp);
@@ -1180,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1180 struct ip_vs_protocol *pp; 1321 struct ip_vs_protocol *pp;
1181 unsigned int offset, verdict; 1322 unsigned int offset, verdict;
1182 union nf_inet_addr snet; 1323 union nf_inet_addr snet;
1324 struct rt6_info *rt;
1183 1325
1184 *related = 1; 1326 *related = 1;
1185 1327
1186 /* reassemble IP fragments */ 1328 /* reassemble IP fragments */
1187 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 1329 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1188 if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? 1330 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1189 IP_DEFRAG_VS_IN :
1190 IP_DEFRAG_VS_FWD))
1191 return NF_STOLEN; 1331 return NF_STOLEN;
1192 } 1332 }
1193 1333
@@ -1230,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1230 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 1370 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1231 return NF_ACCEPT; 1371 return NF_ACCEPT;
1232 1372
1233 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); 1373 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
1374 "Checking incoming ICMPv6 for");
1234 1375
1235 offset += sizeof(struct ipv6hdr); 1376 offset += sizeof(struct ipv6hdr);
1236 1377
@@ -1258,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1258 IPPROTO_SCTP == cih->nexthdr) 1399 IPPROTO_SCTP == cih->nexthdr)
1259 offset += 2 * sizeof(__u16); 1400 offset += 2 * sizeof(__u16);
1260 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); 1401 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
1261 /* do not touch skb anymore */ 1402 /* LOCALNODE from FORWARD hook is not supported */
1403 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1404 (rt = (struct rt6_info *) skb_dst(skb)) &&
1405 rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
1406 IP_VS_DBG(1, "%s(): "
1407 "local delivery to %pI6 but in FORWARD\n",
1408 __func__, &rt->rt6i_dst);
1409 verdict = NF_DROP;
1410 }
1262 1411
1263 __ip_vs_conn_put(cp); 1412 __ip_vs_conn_put(cp);
1264 1413
@@ -1272,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1272 * and send it on its way... 1421 * and send it on its way...
1273 */ 1422 */
1274static unsigned int 1423static unsigned int
1275ip_vs_in(unsigned int hooknum, struct sk_buff *skb, 1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1276 const struct net_device *in, const struct net_device *out,
1277 int (*okfn)(struct sk_buff *))
1278{ 1425{
1279 struct ip_vs_iphdr iph; 1426 struct ip_vs_iphdr iph;
1280 struct ip_vs_protocol *pp; 1427 struct ip_vs_protocol *pp;
1281 struct ip_vs_conn *cp; 1428 struct ip_vs_conn *cp;
1282 int ret, restart, af, pkts; 1429 int ret, restart, pkts;
1283 1430
1284 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1431 /* Already marked as IPVS request or reply? */
1285 1432 if (skb->ipvs_property)
1286 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1433 return NF_ACCEPT;
1287 1434
1288 /* 1435 /*
1289 * Big tappo: only PACKET_HOST, including loopback for local client 1436 * Big tappo:
1290 * Don't handle local packets on IPv6 for now 1437 * - remote client: only PACKET_HOST
1438 * - route: used for struct net when skb->dev is unset
1291 */ 1439 */
1292 if (unlikely(skb->pkt_type != PACKET_HOST)) { 1440 if (unlikely((skb->pkt_type != PACKET_HOST &&
1293 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", 1441 hooknum != NF_INET_LOCAL_OUT) ||
1294 skb->pkt_type, 1442 !skb_dst(skb))) {
1295 iph.protocol, 1443 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1296 IP_VS_DBG_ADDR(af, &iph.daddr)); 1444 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1445 " ignored in hook %u\n",
1446 skb->pkt_type, iph.protocol,
1447 IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
1297 return NF_ACCEPT; 1448 return NF_ACCEPT;
1298 } 1449 }
1450 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1451
1452 /* Bad... Do not break raw sockets */
1453 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1454 af == AF_INET)) {
1455 struct sock *sk = skb->sk;
1456 struct inet_sock *inet = inet_sk(skb->sk);
1457
1458 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1459 return NF_ACCEPT;
1460 }
1299 1461
1300#ifdef CONFIG_IP_VS_IPV6 1462#ifdef CONFIG_IP_VS_IPV6
1301 if (af == AF_INET6) { 1463 if (af == AF_INET6) {
1302 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1464 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1303 int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1465 int related;
1466 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
1304 1467
1305 if (related) 1468 if (related)
1306 return verdict; 1469 return verdict;
@@ -1309,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1309 } else 1472 } else
1310#endif 1473#endif
1311 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1474 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
1312 int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); 1475 int related;
1476 int verdict = ip_vs_in_icmp(skb, &related, hooknum);
1313 1477
1314 if (related) 1478 if (related)
1315 return verdict; 1479 return verdict;
@@ -1329,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1329 if (unlikely(!cp)) { 1493 if (unlikely(!cp)) {
1330 int v; 1494 int v;
1331 1495
1332 /* For local client packets, it could be a response */
1333 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1334 if (cp)
1335 return handle_response(af, skb, pp, cp, iph.len);
1336
1337 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
1338 return v; 1497 return v;
1339 } 1498 }
1340 1499
1341 if (unlikely(!cp)) { 1500 if (unlikely(!cp)) {
1342 /* sorry, all this trouble for a no-hit :) */ 1501 /* sorry, all this trouble for a no-hit :) */
1343 IP_VS_DBG_PKT(12, pp, skb, 0, 1502 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1344 "packet continues traversal as normal"); 1503 "ip_vs_in: packet continues traversal as normal");
1345 return NF_ACCEPT; 1504 return NF_ACCEPT;
1346 } 1505 }
1347 1506
1348 IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); 1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1349 1508
1350 /* Check the server status */ 1509 /* Check the server status */
1351 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1381,8 +1540,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1381 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1382 cp->protocol == IPPROTO_SCTP) { 1541 cp->protocol == IPPROTO_SCTP) {
1383 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1384 (atomic_read(&cp->in_pkts) % 1543 (pkts % sysctl_ip_vs_sync_threshold[1]
1385 sysctl_ip_vs_sync_threshold[1]
1386 == sysctl_ip_vs_sync_threshold[0])) || 1544 == sysctl_ip_vs_sync_threshold[0])) ||
1387 (cp->old_state != cp->state && 1545 (cp->old_state != cp->state &&
1388 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1546 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
@@ -1393,7 +1551,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1393 } 1551 }
1394 } 1552 }
1395 1553
1396 if (af == AF_INET && 1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET &&
1397 (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1398 (((cp->protocol != IPPROTO_TCP || 1557 (((cp->protocol != IPPROTO_TCP ||
1399 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1558 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
@@ -1412,6 +1571,72 @@ out:
1412 return ret; 1571 return ret;
1413} 1572}
1414 1573
1574/*
1575 * AF_INET handler in NF_INET_LOCAL_IN chain
1576 * Schedule and forward packets from remote clients
1577 */
1578static unsigned int
1579ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
1580 const struct net_device *in,
1581 const struct net_device *out,
1582 int (*okfn)(struct sk_buff *))
1583{
1584 return ip_vs_in(hooknum, skb, AF_INET);
1585}
1586
1587/*
1588 * AF_INET handler in NF_INET_LOCAL_OUT chain
1589 * Schedule and forward packets from local clients
1590 */
1591static unsigned int
1592ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1593 const struct net_device *in, const struct net_device *out,
1594 int (*okfn)(struct sk_buff *))
1595{
1596 unsigned int verdict;
1597
1598 /* Disable BH in LOCAL_OUT until all places are fixed */
1599 local_bh_disable();
1600 verdict = ip_vs_in(hooknum, skb, AF_INET);
1601 local_bh_enable();
1602 return verdict;
1603}
1604
1605#ifdef CONFIG_IP_VS_IPV6
1606
1607/*
1608 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1609 * Schedule and forward packets from remote clients
1610 */
1611static unsigned int
1612ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
1613 const struct net_device *in,
1614 const struct net_device *out,
1615 int (*okfn)(struct sk_buff *))
1616{
1617 return ip_vs_in(hooknum, skb, AF_INET6);
1618}
1619
1620/*
1621 * AF_INET6 handler in NF_INET_LOCAL_OUT chain
1622 * Schedule and forward packets from local clients
1623 */
1624static unsigned int
1625ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1626 const struct net_device *in, const struct net_device *out,
1627 int (*okfn)(struct sk_buff *))
1628{
1629 unsigned int verdict;
1630
1631 /* Disable BH in LOCAL_OUT until all places are fixed */
1632 local_bh_disable();
1633 verdict = ip_vs_in(hooknum, skb, AF_INET6);
1634 local_bh_enable();
1635 return verdict;
1636}
1637
1638#endif
1639
1415 1640
1416/* 1641/*
1417 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP 1642 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
@@ -1452,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1452 1677
1453 1678
1454static struct nf_hook_ops ip_vs_ops[] __read_mostly = { 1679static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1680 /* After packet filtering, change source only for VS/NAT */
1681 {
1682 .hook = ip_vs_reply4,
1683 .owner = THIS_MODULE,
1684 .pf = PF_INET,
1685 .hooknum = NF_INET_LOCAL_IN,
1686 .priority = 99,
1687 },
1455 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1688 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1456 * or VS/NAT(change destination), so that filtering rules can be 1689 * or VS/NAT(change destination), so that filtering rules can be
1457 * applied to IPVS. */ 1690 * applied to IPVS. */
1458 { 1691 {
1459 .hook = ip_vs_in, 1692 .hook = ip_vs_remote_request4,
1460 .owner = THIS_MODULE, 1693 .owner = THIS_MODULE,
1461 .pf = PF_INET, 1694 .pf = PF_INET,
1462 .hooknum = NF_INET_LOCAL_IN, 1695 .hooknum = NF_INET_LOCAL_IN,
1463 .priority = 100, 1696 .priority = 101,
1464 }, 1697 },
1465 /* After packet filtering, change source only for VS/NAT */ 1698 /* Before ip_vs_in, change source only for VS/NAT */
1466 { 1699 {
1467 .hook = ip_vs_out, 1700 .hook = ip_vs_local_reply4,
1468 .owner = THIS_MODULE, 1701 .owner = THIS_MODULE,
1469 .pf = PF_INET, 1702 .pf = PF_INET,
1470 .hooknum = NF_INET_FORWARD, 1703 .hooknum = NF_INET_LOCAL_OUT,
1471 .priority = 100, 1704 .priority = -99,
1705 },
1706 /* After mangle, schedule and forward local requests */
1707 {
1708 .hook = ip_vs_local_request4,
1709 .owner = THIS_MODULE,
1710 .pf = PF_INET,
1711 .hooknum = NF_INET_LOCAL_OUT,
1712 .priority = -98,
1472 }, 1713 },
1473 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1714 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1474 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1715 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1476,27 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1476 .hook = ip_vs_forward_icmp, 1717 .hook = ip_vs_forward_icmp,
1477 .owner = THIS_MODULE, 1718 .owner = THIS_MODULE,
1478 .pf = PF_INET, 1719 .pf = PF_INET,
1479 .hooknum = NF_INET_FORWARD, 1720 .hooknum = NF_INET_FORWARD,
1480 .priority = 99, 1721 .priority = 99,
1722 },
1723 /* After packet filtering, change source only for VS/NAT */
1724 {
1725 .hook = ip_vs_reply4,
1726 .owner = THIS_MODULE,
1727 .pf = PF_INET,
1728 .hooknum = NF_INET_FORWARD,
1729 .priority = 100,
1481 }, 1730 },
1482#ifdef CONFIG_IP_VS_IPV6 1731#ifdef CONFIG_IP_VS_IPV6
1732 /* After packet filtering, change source only for VS/NAT */
1733 {
1734 .hook = ip_vs_reply6,
1735 .owner = THIS_MODULE,
1736 .pf = PF_INET6,
1737 .hooknum = NF_INET_LOCAL_IN,
1738 .priority = 99,
1739 },
1483 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1740 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1484 * or VS/NAT(change destination), so that filtering rules can be 1741 * or VS/NAT(change destination), so that filtering rules can be
1485 * applied to IPVS. */ 1742 * applied to IPVS. */
1486 { 1743 {
1487 .hook = ip_vs_in, 1744 .hook = ip_vs_remote_request6,
1488 .owner = THIS_MODULE, 1745 .owner = THIS_MODULE,
1489 .pf = PF_INET6, 1746 .pf = PF_INET6,
1490 .hooknum = NF_INET_LOCAL_IN, 1747 .hooknum = NF_INET_LOCAL_IN,
1491 .priority = 100, 1748 .priority = 101,
1492 }, 1749 },
1493 /* After packet filtering, change source only for VS/NAT */ 1750 /* Before ip_vs_in, change source only for VS/NAT */
1751 {
1752 .hook = ip_vs_local_reply6,
1753 .owner = THIS_MODULE,
1754 .pf = PF_INET,
1755 .hooknum = NF_INET_LOCAL_OUT,
1756 .priority = -99,
1757 },
1758 /* After mangle, schedule and forward local requests */
1494 { 1759 {
1495 .hook = ip_vs_out, 1760 .hook = ip_vs_local_request6,
1496 .owner = THIS_MODULE, 1761 .owner = THIS_MODULE,
1497 .pf = PF_INET6, 1762 .pf = PF_INET6,
1498 .hooknum = NF_INET_FORWARD, 1763 .hooknum = NF_INET_LOCAL_OUT,
1499 .priority = 100, 1764 .priority = -98,
1500 }, 1765 },
1501 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1766 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1502 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1767 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1504,8 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1504 .hook = ip_vs_forward_icmp_v6, 1769 .hook = ip_vs_forward_icmp_v6,
1505 .owner = THIS_MODULE, 1770 .owner = THIS_MODULE,
1506 .pf = PF_INET6, 1771 .pf = PF_INET6,
1507 .hooknum = NF_INET_FORWARD, 1772 .hooknum = NF_INET_FORWARD,
1508 .priority = 99, 1773 .priority = 99,
1774 },
1775 /* After packet filtering, change source only for VS/NAT */
1776 {
1777 .hook = ip_vs_reply6,
1778 .owner = THIS_MODULE,
1779 .pf = PF_INET6,
1780 .hooknum = NF_INET_FORWARD,
1781 .priority = 100,
1509 }, 1782 },
1510#endif 1783#endif
1511}; 1784};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0f0c079c422a..5f5daa30b0af 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock);
61static DEFINE_RWLOCK(__ip_vs_rs_lock); 61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62 62
63/* lock for state and timeout tables */ 63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65 65
66/* lock for drop entry handling */ 66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
@@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0; 88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0; 90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
91 95
92 96
93#ifdef CONFIG_IP_VS_DEBUG 97#ifdef CONFIG_IP_VS_DEBUG
@@ -204,7 +208,7 @@ static void update_defense_level(void)
204 spin_unlock(&__ip_vs_droppacket_lock); 208 spin_unlock(&__ip_vs_droppacket_lock);
205 209
206 /* secure_tcp */ 210 /* secure_tcp */
207 write_lock(&__ip_vs_securetcp_lock); 211 spin_lock(&ip_vs_securetcp_lock);
208 switch (sysctl_ip_vs_secure_tcp) { 212 switch (sysctl_ip_vs_secure_tcp) {
209 case 0: 213 case 0:
210 if (old_secure_tcp >= 2) 214 if (old_secure_tcp >= 2)
@@ -238,7 +242,7 @@ static void update_defense_level(void)
238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 242 old_secure_tcp = sysctl_ip_vs_secure_tcp;
239 if (to_change >= 0) 243 if (to_change >= 0)
240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241 write_unlock(&__ip_vs_securetcp_lock); 245 spin_unlock(&ip_vs_securetcp_lock);
242 246
243 local_bh_enable(); 247 local_bh_enable();
244} 248}
@@ -401,7 +405,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
401 * Get service by {proto,addr,port} in the service table. 405 * Get service by {proto,addr,port} in the service table.
402 */ 406 */
403static inline struct ip_vs_service * 407static inline struct ip_vs_service *
404__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, 408__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
405 __be16 vport) 409 __be16 vport)
406{ 410{
407 unsigned hash; 411 unsigned hash;
@@ -416,7 +420,6 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
416 && (svc->port == vport) 420 && (svc->port == vport)
417 && (svc->protocol == protocol)) { 421 && (svc->protocol == protocol)) {
418 /* HIT */ 422 /* HIT */
419 atomic_inc(&svc->usecnt);
420 return svc; 423 return svc;
421 } 424 }
422 } 425 }
@@ -429,7 +432,7 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
429 * Get service by {fwmark} in the service table. 432 * Get service by {fwmark} in the service table.
430 */ 433 */
431static inline struct ip_vs_service * 434static inline struct ip_vs_service *
432__ip_vs_svc_fwm_get(int af, __u32 fwmark) 435__ip_vs_svc_fwm_find(int af, __u32 fwmark)
433{ 436{
434 unsigned hash; 437 unsigned hash;
435 struct ip_vs_service *svc; 438 struct ip_vs_service *svc;
@@ -440,7 +443,6 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
440 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
441 if (svc->fwmark == fwmark && svc->af == af) { 444 if (svc->fwmark == fwmark && svc->af == af) {
442 /* HIT */ 445 /* HIT */
443 atomic_inc(&svc->usecnt);
444 return svc; 446 return svc;
445 } 447 }
446 } 448 }
@@ -459,14 +461,14 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
459 /* 461 /*
460 * Check the table hashed by fwmark first 462 * Check the table hashed by fwmark first
461 */ 463 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) 464 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
463 goto out; 465 goto out;
464 466
465 /* 467 /*
466 * Check the table hashed by <protocol,addr,port> 468 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 469 * for "full" addressed entries
468 */ 470 */
469 svc = __ip_vs_service_get(af, protocol, vaddr, vport); 471 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
470 472
471 if (svc == NULL 473 if (svc == NULL
472 && protocol == IPPROTO_TCP 474 && protocol == IPPROTO_TCP
@@ -476,7 +478,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
476 * Check if ftp service entry exists, the packet 478 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 479 * might belong to FTP data connections.
478 */ 480 */
479 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); 481 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
480 } 482 }
481 483
482 if (svc == NULL 484 if (svc == NULL
@@ -484,10 +486,12 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
484 /* 486 /*
485 * Check if the catch-all port (port zero) exists 487 * Check if the catch-all port (port zero) exists
486 */ 488 */
487 svc = __ip_vs_service_get(af, protocol, vaddr, 0); 489 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
488 } 490 }
489 491
490 out: 492 out:
493 if (svc)
494 atomic_inc(&svc->usecnt);
491 read_unlock(&__ip_vs_svc_lock); 495 read_unlock(&__ip_vs_svc_lock);
492 496
493 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
@@ -506,14 +510,19 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506 dest->svc = svc; 510 dest->svc = svc;
507} 511}
508 512
509static inline void 513static void
510__ip_vs_unbind_svc(struct ip_vs_dest *dest) 514__ip_vs_unbind_svc(struct ip_vs_dest *dest)
511{ 515{
512 struct ip_vs_service *svc = dest->svc; 516 struct ip_vs_service *svc = dest->svc;
513 517
514 dest->svc = NULL; 518 dest->svc = NULL;
515 if (atomic_dec_and_test(&svc->refcnt)) 519 if (atomic_dec_and_test(&svc->refcnt)) {
520 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521 svc->fwmark,
522 IP_VS_DBG_ADDR(svc->af, &svc->addr),
523 ntohs(svc->port), atomic_read(&svc->usecnt));
516 kfree(svc); 524 kfree(svc);
525 }
517} 526}
518 527
519 528
@@ -758,31 +767,18 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
758 * Update a destination in the given service 767 * Update a destination in the given service
759 */ 768 */
760static void 769static void
761__ip_vs_update_dest(struct ip_vs_service *svc, 770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
762 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) 771 struct ip_vs_dest_user_kern *udest, int add)
763{ 772{
764 int conn_flags; 773 int conn_flags;
765 774
766 /* set the weight and the flags */ 775 /* set the weight and the flags */
767 atomic_set(&dest->weight, udest->weight); 776 atomic_set(&dest->weight, udest->weight);
768 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
769 778 conn_flags |= IP_VS_CONN_F_INACTIVE;
770 /* check if local node and update the flags */
771#ifdef CONFIG_IP_VS_IPV6
772 if (svc->af == AF_INET6) {
773 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
774 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
775 | IP_VS_CONN_F_LOCALNODE;
776 }
777 } else
778#endif
779 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
780 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
781 | IP_VS_CONN_F_LOCALNODE;
782 }
783 779
784 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
785 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
786 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
787 } else { 783 } else {
788 /* 784 /*
@@ -813,6 +809,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
813 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
814 dest->u_threshold = udest->u_threshold; 810 dest->u_threshold = udest->u_threshold;
815 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
812
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
816
817 if (add)
818 ip_vs_new_estimator(&dest->stats);
819
820 write_lock_bh(&__ip_vs_svc_lock);
821
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824
825 if (add) {
826 list_add(&dest->n_list, &svc->destinations);
827 svc->num_dests++;
828 }
829
830 /* call the update_service, because server weight may be changed */
831 if (svc->scheduler->update_service)
832 svc->scheduler->update_service(svc);
833
834 write_unlock_bh(&__ip_vs_svc_lock);
816} 835}
817 836
818 837
@@ -843,7 +862,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 return -EINVAL; 862 return -EINVAL;
844 } 863 }
845 864
846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 865 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
847 if (dest == NULL) { 866 if (dest == NULL) {
848 pr_err("%s(): no memory.\n", __func__); 867 pr_err("%s(): no memory.\n", __func__);
849 return -ENOMEM; 868 return -ENOMEM;
@@ -860,13 +879,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
860 atomic_set(&dest->activeconns, 0); 879 atomic_set(&dest->activeconns, 0);
861 atomic_set(&dest->inactconns, 0); 880 atomic_set(&dest->inactconns, 0);
862 atomic_set(&dest->persistconns, 0); 881 atomic_set(&dest->persistconns, 0);
863 atomic_set(&dest->refcnt, 0); 882 atomic_set(&dest->refcnt, 1);
864 883
865 INIT_LIST_HEAD(&dest->d_list); 884 INIT_LIST_HEAD(&dest->d_list);
866 spin_lock_init(&dest->dst_lock); 885 spin_lock_init(&dest->dst_lock);
867 spin_lock_init(&dest->stats.lock); 886 spin_lock_init(&dest->stats.lock);
868 __ip_vs_update_dest(svc, dest, udest); 887 __ip_vs_update_dest(svc, dest, udest, 1);
869 ip_vs_new_estimator(&dest->stats);
870 888
871 *dest_p = dest; 889 *dest_p = dest;
872 890
@@ -926,65 +944,22 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
926 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 944 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
927 ntohs(dest->vport)); 945 ntohs(dest->vport));
928 946
929 __ip_vs_update_dest(svc, dest, udest);
930
931 /* 947 /*
932 * Get the destination from the trash 948 * Get the destination from the trash
933 */ 949 */
934 list_del(&dest->n_list); 950 list_del(&dest->n_list);
935 951
936 ip_vs_new_estimator(&dest->stats); 952 __ip_vs_update_dest(svc, dest, udest, 1);
937 953 ret = 0;
938 write_lock_bh(&__ip_vs_svc_lock); 954 } else {
939
940 /* 955 /*
941 * Wait until all other svc users go away. 956 * Allocate and initialize the dest structure
942 */ 957 */
943 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 958 ret = ip_vs_new_dest(svc, udest, &dest);
944
945 list_add(&dest->n_list, &svc->destinations);
946 svc->num_dests++;
947
948 /* call the update_service function of its scheduler */
949 if (svc->scheduler->update_service)
950 svc->scheduler->update_service(svc);
951
952 write_unlock_bh(&__ip_vs_svc_lock);
953 return 0;
954 }
955
956 /*
957 * Allocate and initialize the dest structure
958 */
959 ret = ip_vs_new_dest(svc, udest, &dest);
960 if (ret) {
961 return ret;
962 } 959 }
963
964 /*
965 * Add the dest entry into the list
966 */
967 atomic_inc(&dest->refcnt);
968
969 write_lock_bh(&__ip_vs_svc_lock);
970
971 /*
972 * Wait until all other svc users go away.
973 */
974 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
975
976 list_add(&dest->n_list, &svc->destinations);
977 svc->num_dests++;
978
979 /* call the update_service function of its scheduler */
980 if (svc->scheduler->update_service)
981 svc->scheduler->update_service(svc);
982
983 write_unlock_bh(&__ip_vs_svc_lock);
984
985 LeaveFunction(2); 960 LeaveFunction(2);
986 961
987 return 0; 962 return ret;
988} 963}
989 964
990 965
@@ -1023,19 +998,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1023 return -ENOENT; 998 return -ENOENT;
1024 } 999 }
1025 1000
1026 __ip_vs_update_dest(svc, dest, udest); 1001 __ip_vs_update_dest(svc, dest, udest, 0);
1027
1028 write_lock_bh(&__ip_vs_svc_lock);
1029
1030 /* Wait until all other svc users go away */
1031 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1032
1033 /* call the update_service, because server weight may be changed */
1034 if (svc->scheduler->update_service)
1035 svc->scheduler->update_service(svc);
1036
1037 write_unlock_bh(&__ip_vs_svc_lock);
1038
1039 LeaveFunction(2); 1002 LeaveFunction(2);
1040 1003
1041 return 0; 1004 return 0;
@@ -1062,6 +1025,10 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1062 * the destination into the trash. 1025 * the destination into the trash.
1063 */ 1026 */
1064 if (atomic_dec_and_test(&dest->refcnt)) { 1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029 dest->vfwmark,
1030 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031 ntohs(dest->port));
1065 ip_vs_dst_reset(dest); 1032 ip_vs_dst_reset(dest);
1066 /* simply decrease svc->refcnt here, let the caller check 1033 /* simply decrease svc->refcnt here, let the caller check
1067 and release the service if nobody refers to it. 1034 and release the service if nobody refers to it.
@@ -1128,7 +1095,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1128 /* 1095 /*
1129 * Wait until all other svc users go away. 1096 * Wait until all other svc users go away.
1130 */ 1097 */
1131 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1132 1099
1133 /* 1100 /*
1134 * Unlink dest from the service 1101 * Unlink dest from the service
@@ -1157,6 +1124,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1157{ 1124{
1158 int ret = 0; 1125 int ret = 0;
1159 struct ip_vs_scheduler *sched = NULL; 1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1160 struct ip_vs_service *svc = NULL; 1128 struct ip_vs_service *svc = NULL;
1161 1129
1162 /* increase the module use count */ 1130 /* increase the module use count */
@@ -1167,7 +1135,17 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1167 if (sched == NULL) { 1135 if (sched == NULL) {
1168 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1169 ret = -ENOENT; 1137 ret = -ENOENT;
1170 goto out_mod_dec; 1138 goto out_err;
1139 }
1140
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_get(u->pe_name);
1143 if (pe == NULL) {
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1146 ret = -ENOENT;
1147 goto out_err;
1148 }
1171 } 1149 }
1172 1150
1173#ifdef CONFIG_IP_VS_IPV6 1151#ifdef CONFIG_IP_VS_IPV6
@@ -1177,7 +1155,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1177 } 1155 }
1178#endif 1156#endif
1179 1157
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1181 if (svc == NULL) { 1159 if (svc == NULL) {
1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183 ret = -ENOMEM; 1161 ret = -ENOMEM;
@@ -1185,7 +1163,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1185 } 1163 }
1186 1164
1187 /* I'm the first user of the service */ 1165 /* I'm the first user of the service */
1188 atomic_set(&svc->usecnt, 1); 1166 atomic_set(&svc->usecnt, 0);
1189 atomic_set(&svc->refcnt, 0); 1167 atomic_set(&svc->refcnt, 0);
1190 1168
1191 svc->af = u->af; 1169 svc->af = u->af;
@@ -1207,6 +1185,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1207 goto out_err; 1185 goto out_err;
1208 sched = NULL; 1186 sched = NULL;
1209 1187
1188 /* Bind the ct retriever */
1189 ip_vs_bind_pe(svc, pe);
1190 pe = NULL;
1191
1210 /* Update the virtual service counters */ 1192 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT) 1193 if (svc->port == FTPPORT)
1212 atomic_inc(&ip_vs_ftpsvc_counter); 1194 atomic_inc(&ip_vs_ftpsvc_counter);
@@ -1227,10 +1209,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1227 *svc_p = svc; 1209 *svc_p = svc;
1228 return 0; 1210 return 0;
1229 1211
1230 out_err: 1212 out_err:
1231 if (svc != NULL) { 1213 if (svc != NULL) {
1232 if (svc->scheduler) 1214 ip_vs_unbind_scheduler(svc);
1233 ip_vs_unbind_scheduler(svc);
1234 if (svc->inc) { 1215 if (svc->inc) {
1235 local_bh_disable(); 1216 local_bh_disable();
1236 ip_vs_app_inc_put(svc->inc); 1217 ip_vs_app_inc_put(svc->inc);
@@ -1239,8 +1220,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1239 kfree(svc); 1220 kfree(svc);
1240 } 1221 }
1241 ip_vs_scheduler_put(sched); 1222 ip_vs_scheduler_put(sched);
1223 ip_vs_pe_put(pe);
1242 1224
1243 out_mod_dec:
1244 /* decrease the module use count */ 1225 /* decrease the module use count */
1245 ip_vs_use_count_dec(); 1226 ip_vs_use_count_dec();
1246 1227
@@ -1255,6 +1236,7 @@ static int
1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1236ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256{ 1237{
1257 struct ip_vs_scheduler *sched, *old_sched; 1238 struct ip_vs_scheduler *sched, *old_sched;
1239 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1258 int ret = 0; 1240 int ret = 0;
1259 1241
1260 /* 1242 /*
@@ -1267,6 +1249,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1267 } 1249 }
1268 old_sched = sched; 1250 old_sched = sched;
1269 1251
1252 if (u->pe_name && *u->pe_name) {
1253 pe = ip_vs_pe_get(u->pe_name);
1254 if (pe == NULL) {
1255 pr_info("persistence engine module ip_vs_pe_%s "
1256 "not found\n", u->pe_name);
1257 ret = -ENOENT;
1258 goto out;
1259 }
1260 old_pe = pe;
1261 }
1262
1270#ifdef CONFIG_IP_VS_IPV6 1263#ifdef CONFIG_IP_VS_IPV6
1271 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1264 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1272 ret = -EINVAL; 1265 ret = -EINVAL;
@@ -1279,7 +1272,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1279 /* 1272 /*
1280 * Wait until all other svc users go away. 1273 * Wait until all other svc users go away.
1281 */ 1274 */
1282 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1275 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1283 1276
1284 /* 1277 /*
1285 * Set the flags and timeout value 1278 * Set the flags and timeout value
@@ -1318,15 +1311,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1318 } 1311 }
1319 } 1312 }
1320 1313
1314 old_pe = svc->pe;
1315 if (pe != old_pe) {
1316 ip_vs_unbind_pe(svc);
1317 ip_vs_bind_pe(svc, pe);
1318 }
1319
1321 out_unlock: 1320 out_unlock:
1322 write_unlock_bh(&__ip_vs_svc_lock); 1321 write_unlock_bh(&__ip_vs_svc_lock);
1323#ifdef CONFIG_IP_VS_IPV6
1324 out: 1322 out:
1325#endif 1323 ip_vs_scheduler_put(old_sched);
1326 1324 ip_vs_pe_put(old_pe);
1327 if (old_sched)
1328 ip_vs_scheduler_put(old_sched);
1329
1330 return ret; 1325 return ret;
1331} 1326}
1332 1327
@@ -1340,6 +1335,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1340{ 1335{
1341 struct ip_vs_dest *dest, *nxt; 1336 struct ip_vs_dest *dest, *nxt;
1342 struct ip_vs_scheduler *old_sched; 1337 struct ip_vs_scheduler *old_sched;
1338 struct ip_vs_pe *old_pe;
1339
1340 pr_info("%s: enter\n", __func__);
1343 1341
1344 /* Count only IPv4 services for old get/setsockopt interface */ 1342 /* Count only IPv4 services for old get/setsockopt interface */
1345 if (svc->af == AF_INET) 1343 if (svc->af == AF_INET)
@@ -1350,8 +1348,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1350 /* Unbind scheduler */ 1348 /* Unbind scheduler */
1351 old_sched = svc->scheduler; 1349 old_sched = svc->scheduler;
1352 ip_vs_unbind_scheduler(svc); 1350 ip_vs_unbind_scheduler(svc);
1353 if (old_sched) 1351 ip_vs_scheduler_put(old_sched);
1354 ip_vs_scheduler_put(old_sched); 1352
1353 /* Unbind persistence engine */
1354 old_pe = svc->pe;
1355 ip_vs_unbind_pe(svc);
1356 ip_vs_pe_put(old_pe);
1355 1357
1356 /* Unbind app inc */ 1358 /* Unbind app inc */
1357 if (svc->inc) { 1359 if (svc->inc) {
@@ -1378,21 +1380,23 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1378 /* 1380 /*
1379 * Free the service if nobody refers to it 1381 * Free the service if nobody refers to it
1380 */ 1382 */
1381 if (atomic_read(&svc->refcnt) == 0) 1383 if (atomic_read(&svc->refcnt) == 0) {
1384 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1385 svc->fwmark,
1386 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387 ntohs(svc->port), atomic_read(&svc->usecnt));
1382 kfree(svc); 1388 kfree(svc);
1389 }
1383 1390
1384 /* decrease the module use count */ 1391 /* decrease the module use count */
1385 ip_vs_use_count_dec(); 1392 ip_vs_use_count_dec();
1386} 1393}
1387 1394
1388/* 1395/*
1389 * Delete a service from the service list 1396 * Unlink a service from list and try to delete it if its refcnt reached 0
1390 */ 1397 */
1391static int ip_vs_del_service(struct ip_vs_service *svc) 1398static void ip_vs_unlink_service(struct ip_vs_service *svc)
1392{ 1399{
1393 if (svc == NULL)
1394 return -EEXIST;
1395
1396 /* 1400 /*
1397 * Unhash it from the service table 1401 * Unhash it from the service table
1398 */ 1402 */
@@ -1403,11 +1407,21 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1403 /* 1407 /*
1404 * Wait until all the svc users go away. 1408 * Wait until all the svc users go away.
1405 */ 1409 */
1406 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1407 1411
1408 __ip_vs_del_service(svc); 1412 __ip_vs_del_service(svc);
1409 1413
1410 write_unlock_bh(&__ip_vs_svc_lock); 1414 write_unlock_bh(&__ip_vs_svc_lock);
1415}
1416
1417/*
1418 * Delete a service from the service list
1419 */
1420static int ip_vs_del_service(struct ip_vs_service *svc)
1421{
1422 if (svc == NULL)
1423 return -EEXIST;
1424 ip_vs_unlink_service(svc);
1411 1425
1412 return 0; 1426 return 0;
1413} 1427}
@@ -1426,14 +1440,7 @@ static int ip_vs_flush(void)
1426 */ 1440 */
1427 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1441 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1428 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1442 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1429 write_lock_bh(&__ip_vs_svc_lock); 1443 ip_vs_unlink_service(svc);
1430 ip_vs_svc_unhash(svc);
1431 /*
1432 * Wait until all the svc users go away.
1433 */
1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435 __ip_vs_del_service(svc);
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 } 1444 }
1438 } 1445 }
1439 1446
@@ -1443,14 +1450,7 @@ static int ip_vs_flush(void)
1443 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1444 list_for_each_entry_safe(svc, nxt, 1451 list_for_each_entry_safe(svc, nxt,
1445 &ip_vs_svc_fwm_table[idx], f_list) { 1452 &ip_vs_svc_fwm_table[idx], f_list) {
1446 write_lock_bh(&__ip_vs_svc_lock); 1453 ip_vs_unlink_service(svc);
1447 ip_vs_svc_unhash(svc);
1448 /*
1449 * Wait until all the svc users go away.
1450 */
1451 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452 __ip_vs_del_service(svc);
1453 write_unlock_bh(&__ip_vs_svc_lock);
1454 } 1454 }
1455 } 1455 }
1456 1456
@@ -1579,6 +1579,15 @@ static struct ctl_table vs_vars[] = {
1579 .mode = 0644, 1579 .mode = 0644,
1580 .proc_handler = proc_do_defense_mode, 1580 .proc_handler = proc_do_defense_mode,
1581 }, 1581 },
1582#ifdef CONFIG_IP_VS_NFCT
1583 {
1584 .procname = "conntrack",
1585 .data = &sysctl_ip_vs_conntrack,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_dointvec,
1589 },
1590#endif
1582 { 1591 {
1583 .procname = "secure_tcp", 1592 .procname = "secure_tcp",
1584 .data = &sysctl_ip_vs_secure_tcp, 1593 .data = &sysctl_ip_vs_secure_tcp,
@@ -1586,6 +1595,13 @@ static struct ctl_table vs_vars[] = {
1586 .mode = 0644, 1595 .mode = 0644,
1587 .proc_handler = proc_do_defense_mode, 1596 .proc_handler = proc_do_defense_mode,
1588 }, 1597 },
1598 {
1599 .procname = "snat_reroute",
1600 .data = &sysctl_ip_vs_snat_reroute,
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec,
1604 },
1589#if 0 1605#if 0
1590 { 1606 {
1591 .procname = "timeout_established", 1607 .procname = "timeout_established",
@@ -2041,6 +2057,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2041static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2057static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2042 struct ip_vs_service_user *usvc_compat) 2058 struct ip_vs_service_user *usvc_compat)
2043{ 2059{
2060 memset(usvc, 0, sizeof(*usvc));
2061
2044 usvc->af = AF_INET; 2062 usvc->af = AF_INET;
2045 usvc->protocol = usvc_compat->protocol; 2063 usvc->protocol = usvc_compat->protocol;
2046 usvc->addr.ip = usvc_compat->addr; 2064 usvc->addr.ip = usvc_compat->addr;
@@ -2058,6 +2076,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2058static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2076static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2059 struct ip_vs_dest_user *udest_compat) 2077 struct ip_vs_dest_user *udest_compat)
2060{ 2078{
2079 memset(udest, 0, sizeof(*udest));
2080
2061 udest->addr.ip = udest_compat->addr; 2081 udest->addr.ip = udest_compat->addr;
2062 udest->port = udest_compat->port; 2082 udest->port = udest_compat->port;
2063 udest->conn_flags = udest_compat->conn_flags; 2083 udest->conn_flags = udest_compat->conn_flags;
@@ -2147,10 +2167,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2147 2167
2148 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2168 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2149 if (usvc.fwmark == 0) 2169 if (usvc.fwmark == 0)
2150 svc = __ip_vs_service_get(usvc.af, usvc.protocol, 2170 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2151 &usvc.addr, usvc.port); 2171 &usvc.addr, usvc.port);
2152 else 2172 else
2153 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 2173 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2154 2174
2155 if (cmd != IP_VS_SO_SET_ADD 2175 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2176 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2189,9 +2209,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2209 ret = -EINVAL;
2190 } 2210 }
2191 2211
2192 if (svc)
2193 ip_vs_service_put(svc);
2194
2195 out_unlock: 2212 out_unlock:
2196 mutex_unlock(&__ip_vs_mutex); 2213 mutex_unlock(&__ip_vs_mutex);
2197 out_dec: 2214 out_dec:
@@ -2284,10 +2301,10 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2284 int ret = 0; 2301 int ret = 0;
2285 2302
2286 if (get->fwmark) 2303 if (get->fwmark)
2287 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); 2304 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2288 else 2305 else
2289 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, 2306 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2290 get->port); 2307 get->port);
2291 2308
2292 if (svc) { 2309 if (svc) {
2293 int count = 0; 2310 int count = 0;
@@ -2315,7 +2332,6 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2315 } 2332 }
2316 count++; 2333 count++;
2317 } 2334 }
2318 ip_vs_service_put(svc);
2319 } else 2335 } else
2320 ret = -ESRCH; 2336 ret = -ESRCH;
2321 return ret; 2337 return ret;
@@ -2436,15 +2452,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2436 entry = (struct ip_vs_service_entry *)arg; 2452 entry = (struct ip_vs_service_entry *)arg;
2437 addr.ip = entry->addr; 2453 addr.ip = entry->addr;
2438 if (entry->fwmark) 2454 if (entry->fwmark)
2439 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); 2455 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2440 else 2456 else
2441 svc = __ip_vs_service_get(AF_INET, entry->protocol, 2457 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2442 &addr, entry->port); 2458 &addr, entry->port);
2443 if (svc) { 2459 if (svc) {
2444 ip_vs_copy_service(entry, svc); 2460 ip_vs_copy_service(entry, svc);
2445 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2461 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2446 ret = -EFAULT; 2462 ret = -EFAULT;
2447 ip_vs_service_put(svc);
2448 } else 2463 } else
2449 ret = -ESRCH; 2464 ret = -ESRCH;
2450 } 2465 }
@@ -2559,6 +2574,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2559 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2574 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2560 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2575 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2561 .len = IP_VS_SCHEDNAME_MAXLEN }, 2576 .len = IP_VS_SCHEDNAME_MAXLEN },
2577 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2578 .len = IP_VS_PENAME_MAXLEN },
2562 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2579 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2563 .len = sizeof(struct ip_vs_flags) }, 2580 .len = sizeof(struct ip_vs_flags) },
2564 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2581 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
@@ -2635,6 +2652,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2635 } 2652 }
2636 2653
2637 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); 2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2655 if (svc->pe)
2656 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2638 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); 2657 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2639 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); 2658 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); 2659 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
@@ -2711,10 +2730,12 @@ nla_put_failure:
2711} 2730}
2712 2731
2713static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2732static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2714 struct nlattr *nla, int full_entry) 2733 struct nlattr *nla, int full_entry,
2734 struct ip_vs_service **ret_svc)
2715{ 2735{
2716 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2736 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2717 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2737 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2738 struct ip_vs_service *svc;
2718 2739
2719 /* Parse mandatory identifying service fields first */ 2740 /* Parse mandatory identifying service fields first */
2720 if (nla == NULL || 2741 if (nla == NULL ||
@@ -2750,14 +2771,21 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2750 usvc->fwmark = 0; 2771 usvc->fwmark = 0;
2751 } 2772 }
2752 2773
2774 if (usvc->fwmark)
2775 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2776 else
2777 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2778 &usvc->addr, usvc->port);
2779 *ret_svc = svc;
2780
2753 /* If a full entry was requested, check for the additional fields */ 2781 /* If a full entry was requested, check for the additional fields */
2754 if (full_entry) { 2782 if (full_entry) {
2755 struct nlattr *nla_sched, *nla_flags, *nla_timeout, 2783 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2756 *nla_netmask; 2784 *nla_netmask;
2757 struct ip_vs_flags flags; 2785 struct ip_vs_flags flags;
2758 struct ip_vs_service *svc;
2759 2786
2760 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 2787 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2788 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2761 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 2789 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2762 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 2790 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2763 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 2791 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
@@ -2768,21 +2796,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2768 nla_memcpy(&flags, nla_flags, sizeof(flags)); 2796 nla_memcpy(&flags, nla_flags, sizeof(flags));
2769 2797
2770 /* prefill flags from service if it already exists */ 2798 /* prefill flags from service if it already exists */
2771 if (usvc->fwmark) 2799 if (svc)
2772 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2773 else
2774 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2775 &usvc->addr, usvc->port);
2776 if (svc) {
2777 usvc->flags = svc->flags; 2800 usvc->flags = svc->flags;
2778 ip_vs_service_put(svc);
2779 } else
2780 usvc->flags = 0;
2781 2801
2782 /* set new flags from userland */ 2802 /* set new flags from userland */
2783 usvc->flags = (usvc->flags & ~flags.mask) | 2803 usvc->flags = (usvc->flags & ~flags.mask) |
2784 (flags.flags & flags.mask); 2804 (flags.flags & flags.mask);
2785 usvc->sched_name = nla_data(nla_sched); 2805 usvc->sched_name = nla_data(nla_sched);
2806 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2786 usvc->timeout = nla_get_u32(nla_timeout); 2807 usvc->timeout = nla_get_u32(nla_timeout);
2787 usvc->netmask = nla_get_u32(nla_netmask); 2808 usvc->netmask = nla_get_u32(nla_netmask);
2788 } 2809 }
@@ -2793,17 +2814,11 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2793static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2814static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2794{ 2815{
2795 struct ip_vs_service_user_kern usvc; 2816 struct ip_vs_service_user_kern usvc;
2817 struct ip_vs_service *svc;
2796 int ret; 2818 int ret;
2797 2819
2798 ret = ip_vs_genl_parse_service(&usvc, nla, 0); 2820 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2799 if (ret) 2821 return ret ? ERR_PTR(ret) : svc;
2800 return ERR_PTR(ret);
2801
2802 if (usvc.fwmark)
2803 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2804 else
2805 return __ip_vs_service_get(usvc.af, usvc.protocol,
2806 &usvc.addr, usvc.port);
2807} 2822}
2808 2823
2809static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 2824static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2894,7 +2909,6 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2894 2909
2895nla_put_failure: 2910nla_put_failure:
2896 cb->args[0] = idx; 2911 cb->args[0] = idx;
2897 ip_vs_service_put(svc);
2898 2912
2899out_err: 2913out_err:
2900 mutex_unlock(&__ip_vs_mutex); 2914 mutex_unlock(&__ip_vs_mutex);
@@ -3107,17 +3121,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3107 3121
3108 ret = ip_vs_genl_parse_service(&usvc, 3122 ret = ip_vs_genl_parse_service(&usvc,
3109 info->attrs[IPVS_CMD_ATTR_SERVICE], 3123 info->attrs[IPVS_CMD_ATTR_SERVICE],
3110 need_full_svc); 3124 need_full_svc, &svc);
3111 if (ret) 3125 if (ret)
3112 goto out; 3126 goto out;
3113 3127
3114 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3115 if (usvc.fwmark == 0)
3116 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3117 &usvc.addr, usvc.port);
3118 else
3119 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3120
3121 /* Unless we're adding a new service, the service must already exist */ 3128 /* Unless we're adding a new service, the service must already exist */
3122 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3129 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3123 ret = -ESRCH; 3130 ret = -ESRCH;
@@ -3151,6 +3158,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3151 break; 3158 break;
3152 case IPVS_CMD_DEL_SERVICE: 3159 case IPVS_CMD_DEL_SERVICE:
3153 ret = ip_vs_del_service(svc); 3160 ret = ip_vs_del_service(svc);
3161 /* do not use svc, it can be freed */
3154 break; 3162 break;
3155 case IPVS_CMD_NEW_DEST: 3163 case IPVS_CMD_NEW_DEST:
3156 ret = ip_vs_add_dest(svc, &udest); 3164 ret = ip_vs_add_dest(svc, &udest);
@@ -3169,8 +3177,6 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3169 } 3177 }
3170 3178
3171out: 3179out:
3172 if (svc)
3173 ip_vs_service_put(svc);
3174 mutex_unlock(&__ip_vs_mutex); 3180 mutex_unlock(&__ip_vs_mutex);
3175 3181
3176 return ret; 3182 return ret;
@@ -3216,7 +3222,6 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3216 goto out_err; 3222 goto out_err;
3217 } else if (svc) { 3223 } else if (svc) {
3218 ret = ip_vs_genl_fill_service(msg, svc); 3224 ret = ip_vs_genl_fill_service(msg, svc);
3219 ip_vs_service_put(svc);
3220 if (ret) 3225 if (ret)
3221 goto nla_put_failure; 3226 goto nla_put_failure;
3222 } else { 3227 } else {
@@ -3385,6 +3390,16 @@ int __init ip_vs_control_init(void)
3385 3390
3386 EnterFunction(2); 3391 EnterFunction(2);
3387 3392
3393 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3394 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3395 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3396 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3397 }
3398 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3399 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3400 }
3401 smp_wmb();
3402
3388 ret = nf_register_sockopt(&ip_vs_sockopts); 3403 ret = nf_register_sockopt(&ip_vs_sockopts);
3389 if (ret) { 3404 if (ret) {
3390 pr_err("cannot register sockopt.\n"); 3405 pr_err("cannot register sockopt.\n");
@@ -3403,15 +3418,6 @@ int __init ip_vs_control_init(void)
3403 3418
3404 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 3419 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3405 3420
3406 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3407 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3408 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3409 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3410 }
3411 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3412 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3413 }
3414
3415 ip_vs_new_estimator(&ip_vs_stats); 3421 ip_vs_new_estimator(&ip_vs_stats);
3416 3422
3417 /* Hook the defense timer */ 3423 /* Hook the defense timer */
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b76d9e..75455000ad1c 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,17 +20,6 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
34 */ 23 */
35 24
36#define KMSG_COMPONENT "IPVS" 25#define KMSG_COMPONENT "IPVS"
@@ -58,16 +47,6 @@
58#define SERVER_STRING "227 Entering Passive Mode (" 47#define SERVER_STRING "227 Entering Passive Mode ("
59#define CLIENT_STRING "PORT " 48#define CLIENT_STRING "PORT "
60 49
61#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
62#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
63 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
64 (T)->dst.protonum
65
66#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
67#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
68 &((C)->vaddr.ip), ntohs((C)->vport), \
69 &((C)->daddr.ip), ntohs((C)->dport), \
70 (C)->protocol, (C)->state
71 50
72/* 51/*
73 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 52 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv;
85static int 64static int
86ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) 65ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
87{ 66{
67 /* We use connection tracking for the command connection */
68 cp->flags |= IP_VS_CONN_F_NFCT;
88 return 0; 69 return 0;
89} 70}
90 71
@@ -149,120 +130,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
149} 130}
150 131
151/* 132/*
152 * Called from init_conntrack() as expectfn handler.
153 */
154static void
155ip_vs_expect_callback(struct nf_conn *ct,
156 struct nf_conntrack_expect *exp)
157{
158 struct nf_conntrack_tuple *orig, new_reply;
159 struct ip_vs_conn *cp;
160
161 if (exp->tuple.src.l3num != PF_INET)
162 return;
163
164 /*
165 * We assume that no NF locks are held before this callback.
166 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
167 * expectations even if they use wildcard values, now we provide the
168 * actual values from the newly created original conntrack direction.
169 * The conntrack is confirmed when packet reaches IPVS hooks.
170 */
171
172 /* RS->CLIENT */
173 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
174 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
175 &orig->src.u3, orig->src.u.tcp.port,
176 &orig->dst.u3, orig->dst.u.tcp.port);
177 if (cp) {
178 /* Change reply CLIENT->RS to CLIENT->VS */
179 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
180 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
181 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
182 __func__, ct, ct->status,
183 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
184 ARG_CONN(cp));
185 new_reply.dst.u3 = cp->vaddr;
186 new_reply.dst.u.tcp.port = cp->vport;
187 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
188 ", inout cp=" FMT_CONN "\n",
189 __func__, ct,
190 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
191 ARG_CONN(cp));
192 goto alter;
193 }
194
195 /* CLIENT->VS */
196 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
197 &orig->src.u3, orig->src.u.tcp.port,
198 &orig->dst.u3, orig->dst.u.tcp.port);
199 if (cp) {
200 /* Change reply VS->CLIENT to RS->CLIENT */
201 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
202 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
203 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
204 __func__, ct, ct->status,
205 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
206 ARG_CONN(cp));
207 new_reply.src.u3 = cp->daddr;
208 new_reply.src.u.tcp.port = cp->dport;
209 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
210 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
211 __func__, ct,
212 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
213 ARG_CONN(cp));
214 goto alter;
215 }
216
217 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
218 " - unknown expect\n",
219 __func__, ct, ct->status, ARG_TUPLE(orig));
220 return;
221
222alter:
223 /* Never alter conntrack for non-NAT conns */
224 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
225 nf_conntrack_alter_reply(ct, &new_reply);
226 ip_vs_conn_put(cp);
227 return;
228}
229
230/*
231 * Create NF conntrack expectation with wildcard (optional) source port.
232 * Then the default callback function will alter the reply and will confirm
233 * the conntrack entry when the first packet comes.
234 */
235static void
236ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
237 struct ip_vs_conn *cp, u_int8_t proto,
238 const __be16 *port, int from_rs)
239{
240 struct nf_conntrack_expect *exp;
241
242 BUG_ON(!ct || ct == &nf_conntrack_untracked);
243
244 exp = nf_ct_expect_alloc(ct);
245 if (!exp)
246 return;
247
248 if (from_rs)
249 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
250 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
251 proto, port, &cp->cport);
252 else
253 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
254 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
255 proto, port, &cp->vport);
256
257 exp->expectfn = ip_vs_expect_callback;
258
259 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
260 __func__, ct, ARG_TUPLE(&exp->tuple));
261 nf_ct_expect_related(exp);
262 nf_ct_expect_put(exp);
263}
264
265/*
266 * Look at outgoing ftp packets to catch the response to a PASV command 133 * Look at outgoing ftp packets to catch the response to a PASV command
267 * from the server (inside-to-outside). 134 * from the server (inside-to-outside).
268 * When we see one, we build a connection entry with the client address, 135 * When we see one, we build a connection entry with the client address,
@@ -328,14 +195,19 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
328 /* 195 /*
329 * Now update or create an connection entry for it 196 * Now update or create an connection entry for it
330 */ 197 */
331 n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, 198 {
332 &cp->caddr, 0); 199 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol,
201 &from, port, &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p);
203 }
333 if (!n_cp) { 204 if (!n_cp) {
334 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, 205 struct ip_vs_conn_param p;
335 &cp->caddr, 0, 206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
336 &cp->vaddr, port, 207 0, &cp->vaddr, port, &p);
337 &from, port, 208 n_cp = ip_vs_conn_new(&p, &from, port,
338 IP_VS_CONN_F_NO_CPORT, 209 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT,
339 cp->dest); 211 cp->dest);
340 if (!n_cp) 212 if (!n_cp)
341 return 0; 213 return 0;
@@ -370,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
370 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 242 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
371 start-data, end-start, 243 start-data, end-start,
372 buf, buf_len); 244 buf, buf_len);
373 if (ret) 245 if (ret) {
374 ip_vs_expect_related(skb, ct, n_cp, 246 ip_vs_nfct_expect_related(skb, ct, n_cp,
375 IPPROTO_TCP, NULL, 0); 247 IPPROTO_TCP, 0, 0);
248 if (skb->ip_summed == CHECKSUM_COMPLETE)
249 skb->ip_summed = CHECKSUM_UNNECESSARY;
250 /* csum is updated */
251 ret = 1;
252 }
376 } 253 }
377 254
378 /* 255 /*
@@ -479,21 +356,22 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
479 ip_vs_proto_name(iph->protocol), 356 ip_vs_proto_name(iph->protocol),
480 &to.ip, ntohs(port), &cp->vaddr.ip, 0); 357 &to.ip, ntohs(port), &cp->vaddr.ip, 0);
481 358
482 n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, 359 {
483 &to, port, 360 struct ip_vs_conn_param p;
484 &cp->vaddr, htons(ntohs(cp->vport)-1)); 361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
485 if (!n_cp) {
486 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
487 &to, port,
488 &cp->vaddr, htons(ntohs(cp->vport)-1), 362 &cp->vaddr, htons(ntohs(cp->vport)-1),
489 &cp->daddr, htons(ntohs(cp->dport)-1), 363 &p);
490 0, 364 n_cp = ip_vs_conn_in_get(&p);
491 cp->dest); 365 if (!n_cp) {
492 if (!n_cp) 366 n_cp = ip_vs_conn_new(&p, &cp->daddr,
493 return 0; 367 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest);
369 if (!n_cp)
370 return 0;
494 371
495 /* add its controller */ 372 /* add its controller */
496 ip_vs_control_add(n_cp, cp); 373 ip_vs_control_add(n_cp, cp);
374 }
497 } 375 }
498 376
499 /* 377 /*
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000000..4680647cd450
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,292 @@
1/*
2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
3 *
4 * Portions Copyright (C) 2001-2002
5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6 *
7 * Portions Copyright (C) 2003-2010
8 * Julian Anastasov
9 *
10 *
11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 *
26 * Authors:
27 * Ben North <ben@redfrontdoor.org>
28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
30 *
31 *
32 * Current status:
33 *
34 * - provide conntrack confirmation for new and related connections, by
35 * this way we can see their proper conntrack state in all hooks
36 * - support for all forwarding methods, not only NAT
37 * - FTP support (NAT), ability to support other NAT apps with expectations
38 * - to correctly create expectations for related NAT connections the proper
39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41 * NAT rules are needed)
42 * - alter reply for NAT when forwarding packet in original direction:
43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44 * when RELATED conntrack is created from real server (Active FTP DATA)
45 * - if iptables_nat is not loaded the Passive FTP will not work (the
46 * PASV response can not be NAT-ed) but Active FTP should work
47 *
48 */
49
50#define KMSG_COMPONENT "IPVS"
51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53#include <linux/module.h>
54#include <linux/types.h>
55#include <linux/kernel.h>
56#include <linux/errno.h>
57#include <linux/compiler.h>
58#include <linux/vmalloc.h>
59#include <linux/skbuff.h>
60#include <net/ip.h>
61#include <linux/netfilter.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ip_vs.h>
64#include <net/netfilter/nf_conntrack_core.h>
65#include <net/netfilter/nf_conntrack_expect.h>
66#include <net/netfilter/nf_conntrack_helper.h>
67#include <net/netfilter/nf_conntrack_zones.h>
68
69
70#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
73 (T)->dst.protonum
74
75#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77 &((C)->vaddr.ip), ntohs((C)->vport), \
78 &((C)->daddr.ip), ntohs((C)->dport), \
79 (C)->protocol, (C)->state
80
81void
82ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
83{
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple new_tuple;
87
88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
89 nf_ct_is_dying(ct))
90 return;
91
92 /* Never alter conntrack for non-NAT conns */
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return;
95
96 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return;
99
100 /*
101 * The connection is not yet in the hashtable, so we update it.
102 * CIP->VIP will remain the same, so leave the tuple in
103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104 * real-server we will see RIP->DIP.
105 */
106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 /*
108 * This will also take care of UDP and other protocols.
109 */
110 if (outin) {
111 new_tuple.src.u3 = cp->daddr;
112 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113 new_tuple.dst.protonum != IPPROTO_ICMPV6)
114 new_tuple.src.u.tcp.port = cp->dport;
115 } else {
116 new_tuple.dst.u3 = cp->vaddr;
117 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118 new_tuple.dst.protonum != IPPROTO_ICMPV6)
119 new_tuple.dst.u.tcp.port = cp->vport;
120 }
121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122 "ctinfo=%d, old reply=" FMT_TUPLE
123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124 __func__, ct, ct->status, ctinfo,
125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126 ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127 nf_conntrack_alter_reply(ct, &new_tuple);
128}
129
130int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
131{
132 return nf_conntrack_confirm(skb);
133}
134
135/*
136 * Called from init_conntrack() as expectfn handler.
137 */
138static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139 struct nf_conntrack_expect *exp)
140{
141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p;
144
145 if (exp->tuple.src.l3num != PF_INET)
146 return;
147
148 /*
149 * We assume that no NF locks are held before this callback.
150 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
151 * expectations even if they use wildcard values, now we provide the
152 * actual values from the newly created original conntrack direction.
153 * The conntrack is confirmed when packet reaches IPVS hooks.
154 */
155
156 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p);
162 if (cp) {
163 /* Change reply CLIENT->RS to CLIENT->VS */
164 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
165 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
166 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
167 __func__, ct, ct->status,
168 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
169 ARG_CONN(cp));
170 new_reply.dst.u3 = cp->vaddr;
171 new_reply.dst.u.tcp.port = cp->vport;
172 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
173 ", inout cp=" FMT_CONN "\n",
174 __func__, ct,
175 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
176 ARG_CONN(cp));
177 goto alter;
178 }
179
180 /* CLIENT->VS */
181 cp = ip_vs_conn_in_get(&p);
182 if (cp) {
183 /* Change reply VS->CLIENT to RS->CLIENT */
184 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
185 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
186 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
187 __func__, ct, ct->status,
188 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
189 ARG_CONN(cp));
190 new_reply.src.u3 = cp->daddr;
191 new_reply.src.u.tcp.port = cp->dport;
192 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
193 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
194 __func__, ct,
195 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
196 ARG_CONN(cp));
197 goto alter;
198 }
199
200 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
201 " - unknown expect\n",
202 __func__, ct, ct->status, ARG_TUPLE(orig));
203 return;
204
205alter:
206 /* Never alter conntrack for non-NAT conns */
207 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
208 nf_conntrack_alter_reply(ct, &new_reply);
209 ip_vs_conn_put(cp);
210 return;
211}
212
213/*
214 * Create NF conntrack expectation with wildcard (optional) source port.
215 * Then the default callback function will alter the reply and will confirm
216 * the conntrack entry when the first packet comes.
217 * Use port 0 to expect connection from any port.
218 */
219void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
220 struct ip_vs_conn *cp, u_int8_t proto,
221 const __be16 port, int from_rs)
222{
223 struct nf_conntrack_expect *exp;
224
225 if (ct == NULL || nf_ct_is_untracked(ct))
226 return;
227
228 exp = nf_ct_expect_alloc(ct);
229 if (!exp)
230 return;
231
232 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
233 from_rs ? &cp->daddr : &cp->caddr,
234 from_rs ? &cp->caddr : &cp->vaddr,
235 proto, port ? &port : NULL,
236 from_rs ? &cp->cport : &cp->vport);
237
238 exp->expectfn = ip_vs_nfct_expect_callback;
239
240 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
241 __func__, ct, ARG_TUPLE(&exp->tuple));
242 nf_ct_expect_related(exp);
243 nf_ct_expect_put(exp);
244}
245EXPORT_SYMBOL(ip_vs_nfct_expect_related);
246
247/*
248 * Our connection was terminated, try to drop the conntrack immediately
249 */
250void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
251{
252 struct nf_conntrack_tuple_hash *h;
253 struct nf_conn *ct;
254 struct nf_conntrack_tuple tuple;
255
256 if (!cp->cport)
257 return;
258
259 tuple = (struct nf_conntrack_tuple) {
260 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
261 tuple.src.u3 = cp->caddr;
262 tuple.src.u.all = cp->cport;
263 tuple.src.l3num = cp->af;
264 tuple.dst.u3 = cp->vaddr;
265 tuple.dst.u.all = cp->vport;
266
267 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
268 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
272 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */
275 if (del_timer(&ct->timeout)) {
276 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
277 FMT_TUPLE "\n",
278 __func__, ct, ARG_TUPLE(&tuple));
279 if (ct->timeout.function)
280 ct->timeout.function(ct->timeout.data);
281 } else {
282 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
283 FMT_TUPLE "\n",
284 __func__, ct, ARG_TUPLE(&tuple));
285 }
286 nf_ct_put(ct);
287 } else {
288 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
289 __func__, ARG_TUPLE(&tuple));
290 }
291}
292
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
new file mode 100644
index 000000000000..3414af70ee12
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -0,0 +1,147 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/spinlock.h>
6#include <linux/interrupt.h>
7#include <asm/string.h>
8#include <linux/kmod.h>
9#include <linux/sysctl.h>
10
11#include <net/ip_vs.h>
12
13/* IPVS pe list */
14static LIST_HEAD(ip_vs_pe);
15
16/* lock for service table */
17static DEFINE_SPINLOCK(ip_vs_pe_lock);
18
19/* Bind a service with a pe */
20void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
21{
22 svc->pe = pe;
23}
24
25/* Unbind a service from its pe */
26void ip_vs_unbind_pe(struct ip_vs_service *svc)
27{
28 svc->pe = NULL;
29}
30
31/* Get pe in the pe list by name */
32static struct ip_vs_pe *
33ip_vs_pe_getbyname(const char *pe_name)
34{
35 struct ip_vs_pe *pe;
36
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name);
39
40 spin_lock_bh(&ip_vs_pe_lock);
41
42 list_for_each_entry(pe, &ip_vs_pe, n_list) {
43 /* Test and get the modules atomically */
44 if (pe->module &&
45 !try_module_get(pe->module)) {
46 /* This pe is just deleted */
47 continue;
48 }
49 if (strcmp(pe_name, pe->name)==0) {
50 /* HIT */
51 spin_unlock_bh(&ip_vs_pe_lock);
52 return pe;
53 }
54 if (pe->module)
55 module_put(pe->module);
56 }
57
58 spin_unlock_bh(&ip_vs_pe_lock);
59 return NULL;
60}
61
62/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name)
64{
65 struct ip_vs_pe *pe;
66
67 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name);
69
70 /* If pe not found, load the module and search again */
71 if (!pe) {
72 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name);
74 }
75
76 return pe;
77}
78
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe)
87{
88 struct ip_vs_pe *tmp;
89
90 /* increase the module use count */
91 ip_vs_use_count_inc();
92
93 spin_lock_bh(&ip_vs_pe_lock);
94
95 if (!list_empty(&pe->n_list)) {
96 spin_unlock_bh(&ip_vs_pe_lock);
97 ip_vs_use_count_dec();
98 pr_err("%s(): [%s] pe already linked\n",
99 __func__, pe->name);
100 return -EINVAL;
101 }
102
103 /* Make sure that the pe with this name doesn't exist
104 * in the pe list.
105 */
106 list_for_each_entry(tmp, &ip_vs_pe, n_list) {
107 if (strcmp(tmp->name, pe->name) == 0) {
108 spin_unlock_bh(&ip_vs_pe_lock);
109 ip_vs_use_count_dec();
110 pr_err("%s(): [%s] pe already existed "
111 "in the system\n", __func__, pe->name);
112 return -EINVAL;
113 }
114 }
115 /* Add it into the d-linked pe list */
116 list_add(&pe->n_list, &ip_vs_pe);
117 spin_unlock_bh(&ip_vs_pe_lock);
118
119 pr_info("[%s] pe registered.\n", pe->name);
120
121 return 0;
122}
123EXPORT_SYMBOL_GPL(register_ip_vs_pe);
124
125/* Unregister a pe from the pe list */
126int unregister_ip_vs_pe(struct ip_vs_pe *pe)
127{
128 spin_lock_bh(&ip_vs_pe_lock);
129 if (list_empty(&pe->n_list)) {
130 spin_unlock_bh(&ip_vs_pe_lock);
131 pr_err("%s(): [%s] pe is not in the list. failed\n",
132 __func__, pe->name);
133 return -EINVAL;
134 }
135
136 /* Remove it from the d-linked pe list */
137 list_del(&pe->n_list);
138 spin_unlock_bh(&ip_vs_pe_lock);
139
140 /* decrease the module use count */
141 ip_vs_use_count_dec();
142
143 pr_info("[%s] pe unregistered.\n", pe->name);
144
145 return 0;
146}
147EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
new file mode 100644
index 000000000000..b8b4e9620f3e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -0,0 +1,169 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/kernel.h>
6
7#include <net/ip_vs.h>
8#include <net/netfilter/nf_conntrack.h>
9#include <linux/netfilter/nf_conntrack_sip.h>
10
11#ifdef CONFIG_IP_VS_DEBUG
12static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
13 const char *callid, size_t callid_len,
14 int *idx)
15{
16 size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1);
17 memcpy(buf + *idx, callid, len);
18 buf[*idx+len] = '\0';
19 *idx += len + 1;
20 return buf + *idx - len;
21}
22
23#define IP_VS_DEBUG_CALLID(callid, len) \
24 ip_vs_dbg_callid(ip_vs_dbg_buf, sizeof(ip_vs_dbg_buf), \
25 callid, len, &ip_vs_dbg_idx)
26#endif
27
28static int get_callid(const char *dptr, unsigned int dataoff,
29 unsigned int datalen,
30 unsigned int *matchoff, unsigned int *matchlen)
31{
32 /* Find callid */
33 while (1) {
34 int ret = ct_sip_get_header(NULL, dptr, dataoff, datalen,
35 SIP_HDR_CALL_ID, matchoff,
36 matchlen);
37 if (ret > 0)
38 break;
39 if (!ret)
40 return 0;
41 dataoff += *matchoff;
42 }
43
44 /* Empty callid is useless */
45 if (!*matchlen)
46 return -EINVAL;
47
48 /* Too large is useless */
49 if (*matchlen > IP_VS_PEDATA_MAXLEN)
50 return -EINVAL;
51
52 /* SIP headers are always followed by a line terminator */
53 if (*matchoff + *matchlen == datalen)
54 return -EINVAL;
55
56 /* RFC 2543 allows lines to be terminated with CR, LF or CRLF,
57 * RFC 3261 allows only CRLF, we support both. */
58 if (*(dptr + *matchoff + *matchlen) != '\r' &&
59 *(dptr + *matchoff + *matchlen) != '\n')
60 return -EINVAL;
61
62 IP_VS_DBG_BUF(9, "SIP callid %s (%d bytes)\n",
63 IP_VS_DEBUG_CALLID(dptr + *matchoff, *matchlen),
64 *matchlen);
65 return 0;
66}
67
68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{
71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr;
74
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76
77 /* Only useful with UDP */
78 if (iph.protocol != IPPROTO_UDP)
79 return -EINVAL;
80
81 /* No Data ? */
82 dataoff = iph.len + sizeof(struct udphdr);
83 if (dataoff >= skb->len)
84 return -EINVAL;
85
86 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff;
88
89 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
90 return -EINVAL;
91
92 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
93 if (!p->pe_data)
94 return -ENOMEM;
95
96 /* N.B: pe_data is only set on success,
97 * this allows fallback to the default persistence logic on failure
98 */
99 memcpy(p->pe_data, dptr + matchoff, matchlen);
100 p->pe_data_len = matchlen;
101
102 return 0;
103}
104
105static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
106 struct ip_vs_conn *ct)
107
108{
109 bool ret = 0;
110
111 if (ct->af == p->af &&
112 ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
113 /* protocol should only be IPPROTO_IP if
114 * d_addr is a fwmark */
115 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
116 p->vaddr, &ct->vaddr) &&
117 ct->vport == p->vport &&
118 ct->flags & IP_VS_CONN_F_TEMPLATE &&
119 ct->protocol == p->protocol &&
120 ct->pe_data && ct->pe_data_len == p->pe_data_len &&
121 !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
122 ret = 1;
123
124 IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
125 ip_vs_proto_name(p->protocol),
126 IP_VS_DEBUG_CALLID(p->pe_data, p->pe_data_len),
127 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
128 ret ? "hit" : "not hit");
129
130 return ret;
131}
132
133static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p,
134 u32 initval, bool inverse)
135{
136 return jhash(p->pe_data, p->pe_data_len, initval);
137}
138
139static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
140{
141 memcpy(buf, cp->pe_data, cp->pe_data_len);
142 return cp->pe_data_len;
143}
144
145static struct ip_vs_pe ip_vs_sip_pe =
146{
147 .name = "sip",
148 .refcnt = ATOMIC_INIT(0),
149 .module = THIS_MODULE,
150 .n_list = LIST_HEAD_INIT(ip_vs_sip_pe.n_list),
151 .fill_param = ip_vs_sip_fill_param,
152 .ct_match = ip_vs_sip_ct_match,
153 .hashkey_raw = ip_vs_sip_hashkey_raw,
154 .show_pe_data = ip_vs_sip_show_pe_data,
155};
156
157static int __init ip_vs_sip_init(void)
158{
159 return register_ip_vs_pe(&ip_vs_sip_pe);
160}
161
162static void __exit ip_vs_sip_cleanup(void)
163{
164 unregister_ip_vs_pe(&ip_vs_sip_pe);
165}
166
167module_init(ip_vs_sip_init);
168module_exit(ip_vs_sip_cleanup);
169MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 027f654799fe..c53998390877 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
172 else if (ih->frag_off & htons(IP_OFFSET)) 172 else if (ih->frag_off & htons(IP_OFFSET))
173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
174 else { 174 else {
175 __be16 _ports[2], *pptr 175 __be16 _ports[2], *pptr;
176; 176
177 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 177 pptr = skb_header_pointer(skb, offset + ih->ihl*4,
178 sizeof(_ports), _ports); 178 sizeof(_ports), _ports);
179 if (pptr == NULL) 179 if (pptr == NULL)
@@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
223 223
224 224
225void 225void
226ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, 226ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
227 const struct sk_buff *skb, 227 const struct sk_buff *skb,
228 int offset, 228 int offset,
229 const char *msg) 229 const char *msg)
230{ 230{
231#ifdef CONFIG_IP_VS_IPV6 231#ifdef CONFIG_IP_VS_IPV6
232 if (skb->protocol == htons(ETH_P_IPV6)) 232 if (af == AF_INET6)
233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
234 else 234 else
235#endif 235#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 1892dfc12fdd..3a0461117d3f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -40,6 +40,19 @@ struct isakmp_hdr {
40 40
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
45 int inverse, struct ip_vs_conn_param *p)
46{
47 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p);
51 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p);
55}
43 56
44static struct ip_vs_conn * 57static struct ip_vs_conn *
45ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -47,21 +60,10 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
47 int inverse) 60 int inverse)
48{ 61{
49 struct ip_vs_conn *cp; 62 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p;
50 64
51 if (likely(!inverse)) { 65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
52 cp = ip_vs_conn_in_get(af, IPPROTO_UDP, 66 cp = ip_vs_conn_in_get(&p);
53 &iph->saddr,
54 htons(PORT_ISAKMP),
55 &iph->daddr,
56 htons(PORT_ISAKMP));
57 } else {
58 cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
59 &iph->daddr,
60 htons(PORT_ISAKMP),
61 &iph->saddr,
62 htons(PORT_ISAKMP));
63 }
64
65 if (!cp) { 67 if (!cp) {
66 /* 68 /*
67 * We are not sure if the packet is from our 69 * We are not sure if the packet is from our
@@ -87,21 +89,10 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
87 int inverse) 89 int inverse)
88{ 90{
89 struct ip_vs_conn *cp; 91 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p;
90 93
91 if (likely(!inverse)) { 94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
92 cp = ip_vs_conn_out_get(af, IPPROTO_UDP, 95 cp = ip_vs_conn_out_get(&p);
93 &iph->saddr,
94 htons(PORT_ISAKMP),
95 &iph->daddr,
96 htons(PORT_ISAKMP));
97 } else {
98 cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
99 &iph->daddr,
100 htons(PORT_ISAKMP),
101 &iph->saddr,
102 htons(PORT_ISAKMP));
103 }
104
105 if (!cp) { 96 if (!cp) {
106 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
107 "%s%s %s->%s\n", 98 "%s%s %s->%s\n",
@@ -126,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
126 return 0; 117 return 0;
127} 118}
128 119
129
130static void
131ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
132 int offset, const char *msg)
133{
134 char buf[256];
135 struct iphdr _iph, *ih;
136
137 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
138 if (ih == NULL)
139 sprintf(buf, "TRUNCATED");
140 else
141 sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
142
143 pr_debug("%s: %s %s\n", msg, pp->name, buf);
144}
145
146#ifdef CONFIG_IP_VS_IPV6
147static void
148ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
149 int offset, const char *msg)
150{
151 char buf[256];
152 struct ipv6hdr _iph, *ih;
153
154 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
155 if (ih == NULL)
156 sprintf(buf, "TRUNCATED");
157 else
158 sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
159
160 pr_debug("%s: %s %s\n", msg, pp->name, buf);
161}
162#endif
163
164static void
165ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
166 int offset, const char *msg)
167{
168#ifdef CONFIG_IP_VS_IPV6
169 if (skb->protocol == htons(ETH_P_IPV6))
170 ah_esp_debug_packet_v6(pp, skb, offset, msg);
171 else
172#endif
173 ah_esp_debug_packet_v4(pp, skb, offset, msg);
174}
175
176
177static void ah_esp_init(struct ip_vs_protocol *pp) 120static void ah_esp_init(struct ip_vs_protocol *pp)
178{ 121{
179 /* nothing to do now */ 122 /* nothing to do now */
@@ -204,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
204 .register_app = NULL, 147 .register_app = NULL,
205 .unregister_app = NULL, 148 .unregister_app = NULL,
206 .app_conn_bind = NULL, 149 .app_conn_bind = NULL,
207 .debug_packet = ah_esp_debug_packet, 150 .debug_packet = ip_vs_tcpudp_debug_packet,
208 .timeout_change = NULL, /* ISAKMP */ 151 .timeout_change = NULL, /* ISAKMP */
209 .set_state_timeout = NULL, 152 .set_state_timeout = NULL,
210}; 153};
@@ -228,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
228 .register_app = NULL, 171 .register_app = NULL,
229 .unregister_app = NULL, 172 .unregister_app = NULL,
230 .app_conn_bind = NULL, 173 .app_conn_bind = NULL,
231 .debug_packet = ah_esp_debug_packet, 174 .debug_packet = ip_vs_tcpudp_debug_packet,
232 .timeout_change = NULL, /* ISAKMP */ 175 .timeout_change = NULL, /* ISAKMP */
233}; 176};
234#endif 177#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 4c0855cb006e..1ea96bcd342b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
31 if ((sch->type == SCTP_CID_INIT) && 31 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 33 &iph.daddr, sh->dest))) {
34 int ignored;
35
34 if (ip_vs_todrop()) { 36 if (ip_vs_todrop()) {
35 /* 37 /*
36 * It seems that we are very loaded. 38 * It seems that we are very loaded.
@@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
44 * Let the virtual server select a real server for the 46 * Let the virtual server select a real server for the
45 * incoming connection, and create a connection entry. 47 * incoming connection, and create a connection entry.
46 */ 48 */
47 *cpp = ip_vs_schedule(svc, skb); 49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
48 if (!*cpp) { 50 if (!*cpp && !ignored) {
49 *verdict = ip_vs_leave(svc, skb, pp); 51 *verdict = ip_vs_leave(svc, skb, pp);
50 return 0; 52 return 0;
51 } 53 }
@@ -61,6 +63,7 @@ sctp_snat_handler(struct sk_buff *skb,
61{ 63{
62 sctp_sctphdr_t *sctph; 64 sctp_sctphdr_t *sctph;
63 unsigned int sctphoff; 65 unsigned int sctphoff;
66 struct sk_buff *iter;
64 __be32 crc32; 67 __be32 crc32;
65 68
66#ifdef CONFIG_IP_VS_IPV6 69#ifdef CONFIG_IP_VS_IPV6
@@ -89,8 +92,8 @@ sctp_snat_handler(struct sk_buff *skb,
89 92
90 /* Calculate the checksum */ 93 /* Calculate the checksum */
91 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); 94 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
92 for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) 95 skb_walk_frags(skb, iter)
93 crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), 96 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
94 crc32); 97 crc32);
95 crc32 = sctp_end_cksum(crc32); 98 crc32 = sctp_end_cksum(crc32);
96 sctph->checksum = crc32; 99 sctph->checksum = crc32;
@@ -102,9 +105,9 @@ static int
102sctp_dnat_handler(struct sk_buff *skb, 105sctp_dnat_handler(struct sk_buff *skb,
103 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 106 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
104{ 107{
105
106 sctp_sctphdr_t *sctph; 108 sctp_sctphdr_t *sctph;
107 unsigned int sctphoff; 109 unsigned int sctphoff;
110 struct sk_buff *iter;
108 __be32 crc32; 111 __be32 crc32;
109 112
110#ifdef CONFIG_IP_VS_IPV6 113#ifdef CONFIG_IP_VS_IPV6
@@ -133,8 +136,8 @@ sctp_dnat_handler(struct sk_buff *skb,
133 136
134 /* Calculate the checksum */ 137 /* Calculate the checksum */
135 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); 138 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
136 for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) 139 skb_walk_frags(skb, iter)
137 crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), 140 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
138 crc32); 141 crc32);
139 crc32 = sctp_end_cksum(crc32); 142 crc32 = sctp_end_cksum(crc32);
140 sctph->checksum = crc32; 143 sctph->checksum = crc32;
@@ -145,9 +148,9 @@ sctp_dnat_handler(struct sk_buff *skb,
145static int 148static int
146sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 149sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
147{ 150{
148 struct sk_buff *list = skb_shinfo(skb)->frag_list;
149 unsigned int sctphoff; 151 unsigned int sctphoff;
150 struct sctphdr *sh, _sctph; 152 struct sctphdr *sh, _sctph;
153 struct sk_buff *iter;
151 __le32 cmp; 154 __le32 cmp;
152 __le32 val; 155 __le32 val;
153 __u32 tmp; 156 __u32 tmp;
@@ -166,15 +169,15 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
166 cmp = sh->checksum; 169 cmp = sh->checksum;
167 170
168 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb)); 171 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
169 for (; list; list = list->next) 172 skb_walk_frags(skb, iter)
170 tmp = sctp_update_cksum((__u8 *) list->data, 173 tmp = sctp_update_cksum((__u8 *) iter->data,
171 skb_headlen(list), tmp); 174 skb_headlen(iter), tmp);
172 175
173 val = sctp_end_cksum(tmp); 176 val = sctp_end_cksum(tmp);
174 177
175 if (val != cmp) { 178 if (val != cmp) {
176 /* CRC failure, dump it. */ 179 /* CRC failure, dump it. */
177 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 180 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
178 "Failed checksum for"); 181 "Failed checksum for");
179 return 0; 182 return 0;
180 } 183 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 282d24de8592..f6c5200e2146 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
43 return 0; 43 return 0;
44 } 44 }
45 45
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
46 if (th->syn && 47 if (th->syn &&
47 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
48 th->dest))) { 49 th->dest))) {
50 int ignored;
51
49 if (ip_vs_todrop()) { 52 if (ip_vs_todrop()) {
50 /* 53 /*
51 * It seems that we are very loaded. 54 * It seems that we are very loaded.
@@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
62 */ 65 */
63 *cpp = ip_vs_schedule(svc, skb); 66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 67 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 68 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 69 return 0;
67 } 70 }
@@ -101,15 +104,15 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
101#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
102 if (af == AF_INET6) 105 if (af == AF_INET6)
103 tcph->check = 106 tcph->check =
104 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
105 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
106 ~csum_unfold(tcph->check)))); 109 csum_unfold(tcph->check))));
107 else 110 else
108#endif 111#endif
109 tcph->check = 112 tcph->check =
110 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
111 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
112 ~csum_unfold(tcph->check)))); 115 csum_unfold(tcph->check))));
113} 116}
114 117
115 118
@@ -120,6 +123,7 @@ tcp_snat_handler(struct sk_buff *skb,
120 struct tcphdr *tcph; 123 struct tcphdr *tcph;
121 unsigned int tcphoff; 124 unsigned int tcphoff;
122 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
123 127
124#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
125 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -134,13 +138,20 @@ tcp_snat_handler(struct sk_buff *skb,
134 return 0; 138 return 0;
135 139
136 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
137 /* Some checks before mangling */ 143 /* Some checks before mangling */
138 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
139 return 0; 145 return 0;
140 146
141 /* Call application helper if needed */ 147 /* Call application helper if needed */
142 if (!ip_vs_app_pkt_out(cp, skb)) 148 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
143 return 0; 149 return 0;
150 /* ret=2: csum update is needed after payload mangling */
151 if (ret == 1)
152 oldlen = skb->len - tcphoff;
153 else
154 payload_csum = 1;
144 } 155 }
145 156
146 tcph = (void *)skb_network_header(skb) + tcphoff; 157 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -151,12 +162,13 @@ tcp_snat_handler(struct sk_buff *skb,
151 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 162 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
152 htons(oldlen), 163 htons(oldlen),
153 htons(skb->len - tcphoff)); 164 htons(skb->len - tcphoff));
154 } else if (!cp->app) { 165 } else if (!payload_csum) {
155 /* Only port and addr are changed, do fast csum update */ 166 /* Only port and addr are changed, do fast csum update */
156 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 167 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
157 cp->dport, cp->vport); 168 cp->dport, cp->vport);
158 if (skb->ip_summed == CHECKSUM_COMPLETE) 169 if (skb->ip_summed == CHECKSUM_COMPLETE)
159 skb->ip_summed = CHECKSUM_NONE; 170 skb->ip_summed = (cp->app && pp->csum_check) ?
171 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
160 } else { 172 } else {
161 /* full checksum calculation */ 173 /* full checksum calculation */
162 tcph->check = 0; 174 tcph->check = 0;
@@ -174,6 +186,7 @@ tcp_snat_handler(struct sk_buff *skb,
174 skb->len - tcphoff, 186 skb->len - tcphoff,
175 cp->protocol, 187 cp->protocol,
176 skb->csum); 188 skb->csum);
189 skb->ip_summed = CHECKSUM_UNNECESSARY;
177 190
178 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 191 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
179 pp->name, tcph->check, 192 pp->name, tcph->check,
@@ -190,6 +203,7 @@ tcp_dnat_handler(struct sk_buff *skb,
190 struct tcphdr *tcph; 203 struct tcphdr *tcph;
191 unsigned int tcphoff; 204 unsigned int tcphoff;
192 int oldlen; 205 int oldlen;
206 int payload_csum = 0;
193 207
194#ifdef CONFIG_IP_VS_IPV6 208#ifdef CONFIG_IP_VS_IPV6
195 if (cp->af == AF_INET6) 209 if (cp->af == AF_INET6)
@@ -204,6 +218,8 @@ tcp_dnat_handler(struct sk_buff *skb,
204 return 0; 218 return 0;
205 219
206 if (unlikely(cp->app != NULL)) { 220 if (unlikely(cp->app != NULL)) {
221 int ret;
222
207 /* Some checks before mangling */ 223 /* Some checks before mangling */
208 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 224 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
209 return 0; 225 return 0;
@@ -212,8 +228,13 @@ tcp_dnat_handler(struct sk_buff *skb,
212 * Attempt ip_vs_app call. 228 * Attempt ip_vs_app call.
213 * It will fix ip_vs_conn and iph ack_seq stuff 229 * It will fix ip_vs_conn and iph ack_seq stuff
214 */ 230 */
215 if (!ip_vs_app_pkt_in(cp, skb)) 231 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
216 return 0; 232 return 0;
233 /* ret=2: csum update is needed after payload mangling */
234 if (ret == 1)
235 oldlen = skb->len - tcphoff;
236 else
237 payload_csum = 1;
217 } 238 }
218 239
219 tcph = (void *)skb_network_header(skb) + tcphoff; 240 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -223,15 +244,16 @@ tcp_dnat_handler(struct sk_buff *skb,
223 * Adjust TCP checksums 244 * Adjust TCP checksums
224 */ 245 */
225 if (skb->ip_summed == CHECKSUM_PARTIAL) { 246 if (skb->ip_summed == CHECKSUM_PARTIAL) {
226 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 247 tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
227 htons(oldlen), 248 htons(oldlen),
228 htons(skb->len - tcphoff)); 249 htons(skb->len - tcphoff));
229 } else if (!cp->app) { 250 } else if (!payload_csum) {
230 /* Only port and addr are changed, do fast csum update */ 251 /* Only port and addr are changed, do fast csum update */
231 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, 252 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
232 cp->vport, cp->dport); 253 cp->vport, cp->dport);
233 if (skb->ip_summed == CHECKSUM_COMPLETE) 254 if (skb->ip_summed == CHECKSUM_COMPLETE)
234 skb->ip_summed = CHECKSUM_NONE; 255 skb->ip_summed = (cp->app && pp->csum_check) ?
256 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
235 } else { 257 } else {
236 /* full checksum calculation */ 258 /* full checksum calculation */
237 tcph->check = 0; 259 tcph->check = 0;
@@ -278,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
278 skb->len - tcphoff, 300 skb->len - tcphoff,
279 ipv6_hdr(skb)->nexthdr, 301 ipv6_hdr(skb)->nexthdr,
280 skb->csum)) { 302 skb->csum)) {
281 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 303 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
282 "Failed checksum for"); 304 "Failed checksum for");
283 return 0; 305 return 0;
284 } 306 }
@@ -289,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
289 skb->len - tcphoff, 311 skb->len - tcphoff,
290 ip_hdr(skb)->protocol, 312 ip_hdr(skb)->protocol,
291 skb->csum)) { 313 skb->csum)) {
292 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 314 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
293 "Failed checksum for"); 315 "Failed checksum for");
294 return 0; 316 return 0;
295 } 317 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 8553231b5d41..9d106a06bb0a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 46 svc = ip_vs_service_get(af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 47 &iph.daddr, uh->dest);
48 if (svc) { 48 if (svc) {
49 int ignored;
50
49 if (ip_vs_todrop()) { 51 if (ip_vs_todrop()) {
50 /* 52 /*
51 * It seems that we are very loaded. 53 * It seems that we are very loaded.
@@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 62 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 63 * incoming connection, and create a connection entry.
62 */ 64 */
63 *cpp = ip_vs_schedule(svc, skb); 65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 66 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 67 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 68 return 0;
67 } 69 }
@@ -102,15 +104,15 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
102#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
103 if (af == AF_INET6) 105 if (af == AF_INET6)
104 uhdr->check = 106 uhdr->check =
105 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
107 ~csum_unfold(uhdr->check)))); 109 csum_unfold(uhdr->check))));
108 else 110 else
109#endif 111#endif
110 uhdr->check = 112 uhdr->check =
111 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
113 ~csum_unfold(uhdr->check)))); 115 csum_unfold(uhdr->check))));
114} 116}
115 117
116 118
@@ -121,6 +123,7 @@ udp_snat_handler(struct sk_buff *skb,
121 struct udphdr *udph; 123 struct udphdr *udph;
122 unsigned int udphoff; 124 unsigned int udphoff;
123 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
124 127
125#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
126 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -135,6 +138,8 @@ udp_snat_handler(struct sk_buff *skb,
135 return 0; 138 return 0;
136 139
137 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
138 /* Some checks before mangling */ 143 /* Some checks before mangling */
139 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
140 return 0; 145 return 0;
@@ -142,8 +147,13 @@ udp_snat_handler(struct sk_buff *skb,
142 /* 147 /*
143 * Call application helper if needed 148 * Call application helper if needed
144 */ 149 */
145 if (!ip_vs_app_pkt_out(cp, skb)) 150 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
146 return 0; 151 return 0;
152 /* ret=2: csum update is needed after payload mangling */
153 if (ret == 1)
154 oldlen = skb->len - udphoff;
155 else
156 payload_csum = 1;
147 } 157 }
148 158
149 udph = (void *)skb_network_header(skb) + udphoff; 159 udph = (void *)skb_network_header(skb) + udphoff;
@@ -156,12 +166,13 @@ udp_snat_handler(struct sk_buff *skb,
156 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 166 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
157 htons(oldlen), 167 htons(oldlen),
158 htons(skb->len - udphoff)); 168 htons(skb->len - udphoff));
159 } else if (!cp->app && (udph->check != 0)) { 169 } else if (!payload_csum && (udph->check != 0)) {
160 /* Only port and addr are changed, do fast csum update */ 170 /* Only port and addr are changed, do fast csum update */
161 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 171 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
162 cp->dport, cp->vport); 172 cp->dport, cp->vport);
163 if (skb->ip_summed == CHECKSUM_COMPLETE) 173 if (skb->ip_summed == CHECKSUM_COMPLETE)
164 skb->ip_summed = CHECKSUM_NONE; 174 skb->ip_summed = (cp->app && pp->csum_check) ?
175 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
165 } else { 176 } else {
166 /* full checksum calculation */ 177 /* full checksum calculation */
167 udph->check = 0; 178 udph->check = 0;
@@ -181,6 +192,7 @@ udp_snat_handler(struct sk_buff *skb,
181 skb->csum); 192 skb->csum);
182 if (udph->check == 0) 193 if (udph->check == 0)
183 udph->check = CSUM_MANGLED_0; 194 udph->check = CSUM_MANGLED_0;
195 skb->ip_summed = CHECKSUM_UNNECESSARY;
184 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 196 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185 pp->name, udph->check, 197 pp->name, udph->check,
186 (char*)&(udph->check) - (char*)udph); 198 (char*)&(udph->check) - (char*)udph);
@@ -196,6 +208,7 @@ udp_dnat_handler(struct sk_buff *skb,
196 struct udphdr *udph; 208 struct udphdr *udph;
197 unsigned int udphoff; 209 unsigned int udphoff;
198 int oldlen; 210 int oldlen;
211 int payload_csum = 0;
199 212
200#ifdef CONFIG_IP_VS_IPV6 213#ifdef CONFIG_IP_VS_IPV6
201 if (cp->af == AF_INET6) 214 if (cp->af == AF_INET6)
@@ -210,6 +223,8 @@ udp_dnat_handler(struct sk_buff *skb,
210 return 0; 223 return 0;
211 224
212 if (unlikely(cp->app != NULL)) { 225 if (unlikely(cp->app != NULL)) {
226 int ret;
227
213 /* Some checks before mangling */ 228 /* Some checks before mangling */
214 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 229 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
215 return 0; 230 return 0;
@@ -218,8 +233,13 @@ udp_dnat_handler(struct sk_buff *skb,
218 * Attempt ip_vs_app call. 233 * Attempt ip_vs_app call.
219 * It will fix ip_vs_conn 234 * It will fix ip_vs_conn
220 */ 235 */
221 if (!ip_vs_app_pkt_in(cp, skb)) 236 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
222 return 0; 237 return 0;
238 /* ret=2: csum update is needed after payload mangling */
239 if (ret == 1)
240 oldlen = skb->len - udphoff;
241 else
242 payload_csum = 1;
223 } 243 }
224 244
225 udph = (void *)skb_network_header(skb) + udphoff; 245 udph = (void *)skb_network_header(skb) + udphoff;
@@ -229,15 +249,16 @@ udp_dnat_handler(struct sk_buff *skb,
229 * Adjust UDP checksums 249 * Adjust UDP checksums
230 */ 250 */
231 if (skb->ip_summed == CHECKSUM_PARTIAL) { 251 if (skb->ip_summed == CHECKSUM_PARTIAL) {
232 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 252 udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
233 htons(oldlen), 253 htons(oldlen),
234 htons(skb->len - udphoff)); 254 htons(skb->len - udphoff));
235 } else if (!cp->app && (udph->check != 0)) { 255 } else if (!payload_csum && (udph->check != 0)) {
236 /* Only port and addr are changed, do fast csum update */ 256 /* Only port and addr are changed, do fast csum update */
237 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, 257 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
238 cp->vport, cp->dport); 258 cp->vport, cp->dport);
239 if (skb->ip_summed == CHECKSUM_COMPLETE) 259 if (skb->ip_summed == CHECKSUM_COMPLETE)
240 skb->ip_summed = CHECKSUM_NONE; 260 skb->ip_summed = (cp->app && pp->csum_check) ?
261 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
241 } else { 262 } else {
242 /* full checksum calculation */ 263 /* full checksum calculation */
243 udph->check = 0; 264 udph->check = 0;
@@ -293,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
293 skb->len - udphoff, 314 skb->len - udphoff,
294 ipv6_hdr(skb)->nexthdr, 315 ipv6_hdr(skb)->nexthdr,
295 skb->csum)) { 316 skb->csum)) {
296 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 317 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
297 "Failed checksum for"); 318 "Failed checksum for");
298 return 0; 319 return 0;
299 } 320 }
@@ -304,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
304 skb->len - udphoff, 325 skb->len - udphoff,
305 ip_hdr(skb)->protocol, 326 ip_hdr(skb)->protocol,
306 skb->csum)) { 327 skb->csum)) {
307 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 328 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
308 "Failed checksum for"); 329 "Failed checksum for");
309 return 0; 330 return 0;
310 } 331 }
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index bbc1ac795952..076ebe00435d 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,7 +35,7 @@
35static LIST_HEAD(ip_vs_schedulers); 35static LIST_HEAD(ip_vs_schedulers);
36 36
37/* lock for service table */ 37/* lock for service table */
38static DEFINE_RWLOCK(__ip_vs_sched_lock); 38static DEFINE_SPINLOCK(ip_vs_sched_lock);
39 39
40 40
41/* 41/*
@@ -46,15 +46,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
46{ 46{
47 int ret; 47 int ret;
48 48
49 if (svc == NULL) {
50 pr_err("%s(): svc arg NULL\n", __func__);
51 return -EINVAL;
52 }
53 if (scheduler == NULL) {
54 pr_err("%s(): scheduler arg NULL\n", __func__);
55 return -EINVAL;
56 }
57
58 svc->scheduler = scheduler; 49 svc->scheduler = scheduler;
59 50
60 if (scheduler->init_service) { 51 if (scheduler->init_service) {
@@ -74,18 +65,10 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
74 */ 65 */
75int ip_vs_unbind_scheduler(struct ip_vs_service *svc) 66int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
76{ 67{
77 struct ip_vs_scheduler *sched; 68 struct ip_vs_scheduler *sched = svc->scheduler;
78 69
79 if (svc == NULL) { 70 if (!sched)
80 pr_err("%s(): svc arg NULL\n", __func__); 71 return 0;
81 return -EINVAL;
82 }
83
84 sched = svc->scheduler;
85 if (sched == NULL) {
86 pr_err("%s(): svc isn't bound\n", __func__);
87 return -EINVAL;
88 }
89 72
90 if (sched->done_service) { 73 if (sched->done_service) {
91 if (sched->done_service(svc) != 0) { 74 if (sched->done_service(svc) != 0) {
@@ -108,7 +91,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
108 91
109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 92 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
110 93
111 read_lock_bh(&__ip_vs_sched_lock); 94 spin_lock_bh(&ip_vs_sched_lock);
112 95
113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 96 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
114 /* 97 /*
@@ -122,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
122 } 105 }
123 if (strcmp(sched_name, sched->name)==0) { 106 if (strcmp(sched_name, sched->name)==0) {
124 /* HIT */ 107 /* HIT */
125 read_unlock_bh(&__ip_vs_sched_lock); 108 spin_unlock_bh(&ip_vs_sched_lock);
126 return sched; 109 return sched;
127 } 110 }
128 if (sched->module) 111 if (sched->module)
129 module_put(sched->module); 112 module_put(sched->module);
130 } 113 }
131 114
132 read_unlock_bh(&__ip_vs_sched_lock); 115 spin_unlock_bh(&ip_vs_sched_lock);
133 return NULL; 116 return NULL;
134} 117}
135 118
@@ -159,7 +142,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
159 142
160void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) 143void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
161{ 144{
162 if (scheduler->module) 145 if (scheduler && scheduler->module)
163 module_put(scheduler->module); 146 module_put(scheduler->module);
164} 147}
165 148
@@ -184,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
184 /* increase the module use count */ 167 /* increase the module use count */
185 ip_vs_use_count_inc(); 168 ip_vs_use_count_inc();
186 169
187 write_lock_bh(&__ip_vs_sched_lock); 170 spin_lock_bh(&ip_vs_sched_lock);
188 171
189 if (!list_empty(&scheduler->n_list)) { 172 if (!list_empty(&scheduler->n_list)) {
190 write_unlock_bh(&__ip_vs_sched_lock); 173 spin_unlock_bh(&ip_vs_sched_lock);
191 ip_vs_use_count_dec(); 174 ip_vs_use_count_dec();
192 pr_err("%s(): [%s] scheduler already linked\n", 175 pr_err("%s(): [%s] scheduler already linked\n",
193 __func__, scheduler->name); 176 __func__, scheduler->name);
@@ -200,7 +183,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
200 */ 183 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 184 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) { 185 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock); 186 spin_unlock_bh(&ip_vs_sched_lock);
204 ip_vs_use_count_dec(); 187 ip_vs_use_count_dec();
205 pr_err("%s(): [%s] scheduler already existed " 188 pr_err("%s(): [%s] scheduler already existed "
206 "in the system\n", __func__, scheduler->name); 189 "in the system\n", __func__, scheduler->name);
@@ -211,7 +194,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
211 * Add it into the d-linked scheduler list 194 * Add it into the d-linked scheduler list
212 */ 195 */
213 list_add(&scheduler->n_list, &ip_vs_schedulers); 196 list_add(&scheduler->n_list, &ip_vs_schedulers);
214 write_unlock_bh(&__ip_vs_sched_lock); 197 spin_unlock_bh(&ip_vs_sched_lock);
215 198
216 pr_info("[%s] scheduler registered.\n", scheduler->name); 199 pr_info("[%s] scheduler registered.\n", scheduler->name);
217 200
@@ -229,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
229 return -EINVAL; 212 return -EINVAL;
230 } 213 }
231 214
232 write_lock_bh(&__ip_vs_sched_lock); 215 spin_lock_bh(&ip_vs_sched_lock);
233 if (list_empty(&scheduler->n_list)) { 216 if (list_empty(&scheduler->n_list)) {
234 write_unlock_bh(&__ip_vs_sched_lock); 217 spin_unlock_bh(&ip_vs_sched_lock);
235 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 218 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
236 __func__, scheduler->name); 219 __func__, scheduler->name);
237 return -EINVAL; 220 return -EINVAL;
@@ -241,7 +224,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
241 * Remove it from the d-linked scheduler list 224 * Remove it from the d-linked scheduler list
242 */ 225 */
243 list_del(&scheduler->n_list); 226 list_del(&scheduler->n_list);
244 write_unlock_bh(&__ip_vs_sched_lock); 227 spin_unlock_bh(&ip_vs_sched_lock);
245 228
246 /* decrease the module use count */ 229 /* decrease the module use count */
247 ip_vs_use_count_dec(); 230 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba06939829f..ab85aedea17e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -288,6 +288,16 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
288 ip_vs_sync_conn(cp->control); 288 ip_vs_sync_conn(cp->control);
289} 289}
290 290
291static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol,
293 const union nf_inet_addr *caddr, __be16 cport,
294 const union nf_inet_addr *vaddr, __be16 vport,
295 struct ip_vs_conn_param *p)
296{
297 /* XXX: Need to take into account persistence engine */
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
299 return 0;
300}
291 301
292/* 302/*
293 * Process received multicast message and create the corresponding 303 * Process received multicast message and create the corresponding
@@ -301,6 +311,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
301 struct ip_vs_conn *cp; 311 struct ip_vs_conn *cp;
302 struct ip_vs_protocol *pp; 312 struct ip_vs_protocol *pp;
303 struct ip_vs_dest *dest; 313 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param;
304 char *p; 315 char *p;
305 int i; 316 int i;
306 317
@@ -370,18 +381,20 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
370 } 381 }
371 } 382 }
372 383
373 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 384 {
374 cp = ip_vs_conn_in_get(AF_INET, s->protocol, 385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
375 (union nf_inet_addr *)&s->caddr, 386 (union nf_inet_addr *)&s->caddr,
376 s->cport, 387 s->cport,
377 (union nf_inet_addr *)&s->vaddr, 388 (union nf_inet_addr *)&s->vaddr,
378 s->vport); 389 s->vport, &param)) {
379 else 390 pr_err("ip_vs_conn_fill_param_sync failed");
380 cp = ip_vs_ct_in_get(AF_INET, s->protocol, 391 return;
381 (union nf_inet_addr *)&s->caddr, 392 }
382 s->cport, 393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
383 (union nf_inet_addr *)&s->vaddr, 394 cp = ip_vs_conn_in_get(&param);
384 s->vport); 395 else
396 cp = ip_vs_ct_in_get(&param);
397 }
385 if (!cp) { 398 if (!cp) {
386 /* 399 /*
387 * Find the appropriate destination for the connection. 400 * Find the appropriate destination for the connection.
@@ -406,14 +419,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
406 else 419 else
407 flags &= ~IP_VS_CONN_F_INACTIVE; 420 flags &= ~IP_VS_CONN_F_INACTIVE;
408 } 421 }
409 cp = ip_vs_conn_new(AF_INET, s->protocol, 422 cp = ip_vs_conn_new(&param,
410 (union nf_inet_addr *)&s->caddr,
411 s->cport,
412 (union nf_inet_addr *)&s->vaddr,
413 s->vport,
414 (union nf_inet_addr *)&s->daddr, 423 (union nf_inet_addr *)&s->daddr,
415 s->dport, 424 s->dport, flags, dest);
416 flags, dest);
417 if (dest) 425 if (dest)
418 atomic_dec(&dest->refcnt); 426 atomic_dec(&dest->refcnt);
419 if (!cp) { 427 if (!cp) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 49df6bea6a2d..de04ea39cde8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -11,6 +11,16 @@
11 * 11 *
12 * Changes: 12 * Changes:
13 * 13 *
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
14 */ 24 */
15 25
16#define KMSG_COMPONENT "IPVS" 26#define KMSG_COMPONENT "IPVS"
@@ -26,9 +36,9 @@
26#include <net/route.h> /* for ip_route_output */ 36#include <net/route.h> /* for ip_route_output */
27#include <net/ipv6.h> 37#include <net/ipv6.h>
28#include <net/ip6_route.h> 38#include <net/ip6_route.h>
39#include <net/addrconf.h>
29#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 41#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
32#include <linux/netfilter_ipv4.h> 42#include <linux/netfilter_ipv4.h>
33 43
34#include <net/ip_vs.h> 44#include <net/ip_vs.h>
@@ -38,26 +48,27 @@
38 * Destination cache to speed up outgoing route lookup 48 * Destination cache to speed up outgoing route lookup
39 */ 49 */
40static inline void 50static inline void
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) 51__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
52 u32 dst_cookie)
42{ 53{
43 struct dst_entry *old_dst; 54 struct dst_entry *old_dst;
44 55
45 old_dst = dest->dst_cache; 56 old_dst = dest->dst_cache;
46 dest->dst_cache = dst; 57 dest->dst_cache = dst;
47 dest->dst_rtos = rtos; 58 dest->dst_rtos = rtos;
59 dest->dst_cookie = dst_cookie;
48 dst_release(old_dst); 60 dst_release(old_dst);
49} 61}
50 62
51static inline struct dst_entry * 63static inline struct dst_entry *
52__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) 64__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
53{ 65{
54 struct dst_entry *dst = dest->dst_cache; 66 struct dst_entry *dst = dest->dst_cache;
55 67
56 if (!dst) 68 if (!dst)
57 return NULL; 69 return NULL;
58 if ((dst->obsolete 70 if ((dst->obsolete || rtos != dest->dst_rtos) &&
59 || (dest->af == AF_INET && rtos != dest->dst_rtos)) && 71 dst->ops->check(dst, dest->dst_cookie) == NULL) {
60 dst->ops->check(dst, cookie) == NULL) {
61 dest->dst_cache = NULL; 72 dest->dst_cache = NULL;
62 dst_release(dst); 73 dst_release(dst);
63 return NULL; 74 return NULL;
@@ -66,16 +77,24 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
66 return dst; 77 return dst;
67} 78}
68 79
80/*
81 * Get route to destination or remote server
82 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
83 * &4=Allow redirect from remote daddr to local
84 */
69static struct rtable * 85static struct rtable *
70__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 86__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
87 __be32 daddr, u32 rtos, int rt_mode)
71{ 88{
89 struct net *net = dev_net(skb_dst(skb)->dev);
72 struct rtable *rt; /* Route to the other host */ 90 struct rtable *rt; /* Route to the other host */
73 struct ip_vs_dest *dest = cp->dest; 91 struct rtable *ort; /* Original route */
92 int local;
74 93
75 if (dest) { 94 if (dest) {
76 spin_lock(&dest->dst_lock); 95 spin_lock(&dest->dst_lock);
77 if (!(rt = (struct rtable *) 96 if (!(rt = (struct rtable *)
78 __ip_vs_dst_check(dest, rtos, 0))) { 97 __ip_vs_dst_check(dest, rtos))) {
79 struct flowi fl = { 98 struct flowi fl = {
80 .oif = 0, 99 .oif = 0,
81 .nl_u = { 100 .nl_u = {
@@ -85,13 +104,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
85 .tos = rtos, } }, 104 .tos = rtos, } },
86 }; 105 };
87 106
88 if (ip_route_output_key(&init_net, &rt, &fl)) { 107 if (ip_route_output_key(net, &rt, &fl)) {
89 spin_unlock(&dest->dst_lock); 108 spin_unlock(&dest->dst_lock);
90 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 109 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
91 &dest->addr.ip); 110 &dest->addr.ip);
92 return NULL; 111 return NULL;
93 } 112 }
94 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); 113 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
95 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 114 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
96 &dest->addr.ip, 115 &dest->addr.ip,
97 atomic_read(&rt->dst.__refcnt), rtos); 116 atomic_read(&rt->dst.__refcnt), rtos);
@@ -102,78 +121,199 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
102 .oif = 0, 121 .oif = 0,
103 .nl_u = { 122 .nl_u = {
104 .ip4_u = { 123 .ip4_u = {
105 .daddr = cp->daddr.ip, 124 .daddr = daddr,
106 .saddr = 0, 125 .saddr = 0,
107 .tos = rtos, } }, 126 .tos = rtos, } },
108 }; 127 };
109 128
110 if (ip_route_output_key(&init_net, &rt, &fl)) { 129 if (ip_route_output_key(net, &rt, &fl)) {
111 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
112 &cp->daddr.ip); 131 &daddr);
113 return NULL; 132 return NULL;
114 } 133 }
115 } 134 }
116 135
136 local = rt->rt_flags & RTCF_LOCAL;
137 if (!((local ? 1 : 2) & rt_mode)) {
138 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
139 (rt->rt_flags & RTCF_LOCAL) ?
140 "local":"non-local", &rt->rt_dst);
141 ip_rt_put(rt);
142 return NULL;
143 }
144 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
145 ort->rt_flags & RTCF_LOCAL)) {
146 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
147 "requires NAT method, dest: %pI4\n",
148 &ip_hdr(skb)->daddr, &rt->rt_dst);
149 ip_rt_put(rt);
150 return NULL;
151 }
152 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
153 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
154 "to non-local address, dest: %pI4\n",
155 &ip_hdr(skb)->saddr, &rt->rt_dst);
156 ip_rt_put(rt);
157 return NULL;
158 }
159
117 return rt; 160 return rt;
118} 161}
119 162
163/* Reroute packet to local IPv4 stack after DNAT */
164static int
165__ip_vs_reroute_locally(struct sk_buff *skb)
166{
167 struct rtable *rt = skb_rtable(skb);
168 struct net_device *dev = rt->dst.dev;
169 struct net *net = dev_net(dev);
170 struct iphdr *iph = ip_hdr(skb);
171
172 if (rt->fl.iif) {
173 unsigned long orefdst = skb->_skb_refdst;
174
175 if (ip_route_input(skb, iph->daddr, iph->saddr,
176 iph->tos, skb->dev))
177 return 0;
178 refdst_drop(orefdst);
179 } else {
180 struct flowi fl = {
181 .oif = 0,
182 .nl_u = {
183 .ip4_u = {
184 .daddr = iph->daddr,
185 .saddr = iph->saddr,
186 .tos = RT_TOS(iph->tos),
187 }
188 },
189 .mark = skb->mark,
190 };
191 struct rtable *rt;
192
193 if (ip_route_output_key(net, &rt, &fl))
194 return 0;
195 if (!(rt->rt_flags & RTCF_LOCAL)) {
196 ip_rt_put(rt);
197 return 0;
198 }
199 /* Drop old route. */
200 skb_dst_drop(skb);
201 skb_dst_set(skb, &rt->dst);
202 }
203 return 1;
204}
205
120#ifdef CONFIG_IP_VS_IPV6 206#ifdef CONFIG_IP_VS_IPV6
207
208static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
209{
210 return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
211}
212
213static struct dst_entry *
214__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
215 struct in6_addr *ret_saddr, int do_xfrm)
216{
217 struct dst_entry *dst;
218 struct flowi fl = {
219 .oif = 0,
220 .nl_u = {
221 .ip6_u = {
222 .daddr = *daddr,
223 },
224 },
225 };
226
227 dst = ip6_route_output(net, NULL, &fl);
228 if (dst->error)
229 goto out_err;
230 if (!ret_saddr)
231 return dst;
232 if (ipv6_addr_any(&fl.fl6_src) &&
233 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
234 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
235 goto out_err;
236 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
237 goto out_err;
238 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
239 return dst;
240
241out_err:
242 dst_release(dst);
243 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
244 return NULL;
245}
246
247/*
248 * Get route to destination or remote server
249 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
250 * &4=Allow redirect from remote daddr to local
251 */
121static struct rt6_info * 252static struct rt6_info *
122__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) 253__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
254 struct in6_addr *daddr, struct in6_addr *ret_saddr,
255 int do_xfrm, int rt_mode)
123{ 256{
257 struct net *net = dev_net(skb_dst(skb)->dev);
124 struct rt6_info *rt; /* Route to the other host */ 258 struct rt6_info *rt; /* Route to the other host */
125 struct ip_vs_dest *dest = cp->dest; 259 struct rt6_info *ort; /* Original route */
260 struct dst_entry *dst;
261 int local;
126 262
127 if (dest) { 263 if (dest) {
128 spin_lock(&dest->dst_lock); 264 spin_lock(&dest->dst_lock);
129 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); 265 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
130 if (!rt) { 266 if (!rt) {
131 struct flowi fl = { 267 u32 cookie;
132 .oif = 0,
133 .nl_u = {
134 .ip6_u = {
135 .daddr = dest->addr.in6,
136 .saddr = {
137 .s6_addr32 =
138 { 0, 0, 0, 0 },
139 },
140 },
141 },
142 };
143 268
144 rt = (struct rt6_info *)ip6_route_output(&init_net, 269 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
145 NULL, &fl); 270 &dest->dst_saddr,
146 if (!rt) { 271 do_xfrm);
272 if (!dst) {
147 spin_unlock(&dest->dst_lock); 273 spin_unlock(&dest->dst_lock);
148 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
149 &dest->addr.in6);
150 return NULL; 274 return NULL;
151 } 275 }
152 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); 276 rt = (struct rt6_info *) dst;
153 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 277 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
154 &dest->addr.in6, 278 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
279 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
280 &dest->addr.in6, &dest->dst_saddr,
155 atomic_read(&rt->dst.__refcnt)); 281 atomic_read(&rt->dst.__refcnt));
156 } 282 }
283 if (ret_saddr)
284 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
157 spin_unlock(&dest->dst_lock); 285 spin_unlock(&dest->dst_lock);
158 } else { 286 } else {
159 struct flowi fl = { 287 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
160 .oif = 0, 288 if (!dst)
161 .nl_u = {
162 .ip6_u = {
163 .daddr = cp->daddr.in6,
164 .saddr = {
165 .s6_addr32 = { 0, 0, 0, 0 },
166 },
167 },
168 },
169 };
170
171 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
172 if (!rt) {
173 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
174 &cp->daddr.in6);
175 return NULL; 289 return NULL;
176 } 290 rt = (struct rt6_info *) dst;
291 }
292
293 local = __ip_vs_is_local_route6(rt);
294 if (!((local ? 1 : 2) & rt_mode)) {
295 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
296 local ? "local":"non-local", daddr);
297 dst_release(&rt->dst);
298 return NULL;
299 }
300 if (local && !(rt_mode & 4) &&
301 !((ort = (struct rt6_info *) skb_dst(skb)) &&
302 __ip_vs_is_local_route6(ort))) {
303 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
304 "requires NAT method, dest: %pI6\n",
305 &ipv6_hdr(skb)->daddr, daddr);
306 dst_release(&rt->dst);
307 return NULL;
308 }
309 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
310 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
311 IPV6_ADDR_LOOPBACK)) {
312 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
313 "to non-local address, dest: %pI6\n",
314 &ipv6_hdr(skb)->saddr, daddr);
315 dst_release(&rt->dst);
316 return NULL;
177 } 317 }
178 318
179 return rt; 319 return rt;
@@ -194,12 +334,44 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
194 dst_release(old_dst); 334 dst_release(old_dst);
195} 335}
196 336
197#define IP_VS_XMIT(pf, skb, rt) \ 337#define IP_VS_XMIT_TUNNEL(skb, cp) \
338({ \
339 int __ret = NF_ACCEPT; \
340 \
341 (skb)->ipvs_property = 1; \
342 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
343 __ret = ip_vs_confirm_conntrack(skb, cp); \
344 if (__ret == NF_ACCEPT) { \
345 nf_reset(skb); \
346 skb_forward_csum(skb); \
347 } \
348 __ret; \
349})
350
351#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
352do { \
353 (skb)->ipvs_property = 1; \
354 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
355 ip_vs_notrack(skb); \
356 else \
357 ip_vs_update_conntrack(skb, cp, 1); \
358 if (local) \
359 return NF_ACCEPT; \
360 skb_forward_csum(skb); \
361 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
362 skb_dst(skb)->dev, dst_output); \
363} while (0)
364
365#define IP_VS_XMIT(pf, skb, cp, local) \
198do { \ 366do { \
199 (skb)->ipvs_property = 1; \ 367 (skb)->ipvs_property = 1; \
368 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
369 ip_vs_notrack(skb); \
370 if (local) \
371 return NF_ACCEPT; \
200 skb_forward_csum(skb); \ 372 skb_forward_csum(skb); \
201 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 373 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
202 (rt)->dst.dev, dst_output); \ 374 skb_dst(skb)->dev, dst_output); \
203} while (0) 375} while (0)
204 376
205 377
@@ -211,7 +383,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
211 struct ip_vs_protocol *pp) 383 struct ip_vs_protocol *pp)
212{ 384{
213 /* we do not touch skb and do not need pskb ptr */ 385 /* we do not touch skb and do not need pskb ptr */
214 return NF_ACCEPT; 386 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
215} 387}
216 388
217 389
@@ -226,24 +398,13 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
226{ 398{
227 struct rtable *rt; /* Route to the other host */ 399 struct rtable *rt; /* Route to the other host */
228 struct iphdr *iph = ip_hdr(skb); 400 struct iphdr *iph = ip_hdr(skb);
229 u8 tos = iph->tos;
230 int mtu; 401 int mtu;
231 struct flowi fl = {
232 .oif = 0,
233 .nl_u = {
234 .ip4_u = {
235 .daddr = iph->daddr,
236 .saddr = 0,
237 .tos = RT_TOS(tos), } },
238 };
239 402
240 EnterFunction(10); 403 EnterFunction(10);
241 404
242 if (ip_route_output_key(&init_net, &rt, &fl)) { 405 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
243 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 406 RT_TOS(iph->tos), 2)))
244 __func__, &iph->daddr);
245 goto tx_error_icmp; 407 goto tx_error_icmp;
246 }
247 408
248 /* MTU checking */ 409 /* MTU checking */
249 mtu = dst_mtu(&rt->dst); 410 mtu = dst_mtu(&rt->dst);
@@ -271,7 +432,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
271 /* Another hack: avoid icmp_send in ip_fragment */ 432 /* Another hack: avoid icmp_send in ip_fragment */
272 skb->local_df = 1; 433 skb->local_df = 1;
273 434
274 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 435 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
275 436
276 LeaveFunction(10); 437 LeaveFunction(10);
277 return NF_STOLEN; 438 return NF_STOLEN;
@@ -292,28 +453,22 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
292 struct rt6_info *rt; /* Route to the other host */ 453 struct rt6_info *rt; /* Route to the other host */
293 struct ipv6hdr *iph = ipv6_hdr(skb); 454 struct ipv6hdr *iph = ipv6_hdr(skb);
294 int mtu; 455 int mtu;
295 struct flowi fl = {
296 .oif = 0,
297 .nl_u = {
298 .ip6_u = {
299 .daddr = iph->daddr,
300 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
301 };
302 456
303 EnterFunction(10); 457 EnterFunction(10);
304 458
305 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 459 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
306 if (!rt) {
307 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
308 __func__, &iph->daddr);
309 goto tx_error_icmp; 460 goto tx_error_icmp;
310 }
311 461
312 /* MTU checking */ 462 /* MTU checking */
313 mtu = dst_mtu(&rt->dst); 463 mtu = dst_mtu(&rt->dst);
314 if (skb->len > mtu) { 464 if (skb->len > mtu) {
315 dst_release(&rt->dst); 465 if (!skb->dev) {
466 struct net *net = dev_net(skb_dst(skb)->dev);
467
468 skb->dev = net->loopback_dev;
469 }
316 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 470 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
471 dst_release(&rt->dst);
317 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 472 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
318 goto tx_error; 473 goto tx_error;
319 } 474 }
@@ -335,7 +490,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
335 /* Another hack: avoid icmp_send in ip_fragment */ 490 /* Another hack: avoid icmp_send in ip_fragment */
336 skb->local_df = 1; 491 skb->local_df = 1;
337 492
338 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 493 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
339 494
340 LeaveFunction(10); 495 LeaveFunction(10);
341 return NF_STOLEN; 496 return NF_STOLEN;
@@ -349,36 +504,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
349} 504}
350#endif 505#endif
351 506
352void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 if (outin)
369 new_tuple.src.u3 = cp->daddr;
370 else
371 new_tuple.dst.u3 = cp->vaddr;
372 /*
373 * This will also take care of UDP and other protocols.
374 */
375 if (outin)
376 new_tuple.src.u.tcp.port = cp->dport;
377 else
378 new_tuple.dst.u.tcp.port = cp->vport;
379 nf_conntrack_alter_reply(ct, &new_tuple);
380}
381
382/* 507/*
383 * NAT transmitter (only for outside-to-inside nat forwarding) 508 * NAT transmitter (only for outside-to-inside nat forwarding)
384 * Not used for related ICMP 509 * Not used for related ICMP
@@ -390,6 +515,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
390 struct rtable *rt; /* Route to the other host */ 515 struct rtable *rt; /* Route to the other host */
391 int mtu; 516 int mtu;
392 struct iphdr *iph = ip_hdr(skb); 517 struct iphdr *iph = ip_hdr(skb);
518 int local;
393 519
394 EnterFunction(10); 520 EnterFunction(10);
395 521
@@ -403,16 +529,42 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
403 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 529 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
404 } 530 }
405 531
406 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 532 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
533 RT_TOS(iph->tos), 1|2|4)))
407 goto tx_error_icmp; 534 goto tx_error_icmp;
535 local = rt->rt_flags & RTCF_LOCAL;
536 /*
537 * Avoid duplicate tuple in reply direction for NAT traffic
538 * to local address when connection is sync-ed
539 */
540#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
541 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
542 enum ip_conntrack_info ctinfo;
543 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
544
545 if (ct && !nf_ct_is_untracked(ct)) {
546 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
547 "ip_vs_nat_xmit(): "
548 "stopping DNAT to local address");
549 goto tx_error_put;
550 }
551 }
552#endif
553
554 /* From world but DNAT to loopback address? */
555 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
556 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
557 "stopping DNAT to loopback address");
558 goto tx_error_put;
559 }
408 560
409 /* MTU checking */ 561 /* MTU checking */
410 mtu = dst_mtu(&rt->dst); 562 mtu = dst_mtu(&rt->dst);
411 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 563 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
412 ip_rt_put(rt);
413 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 564 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
414 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 565 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
415 goto tx_error; 566 "ip_vs_nat_xmit(): frag needed for");
567 goto tx_error_put;
416 } 568 }
417 569
418 /* copy-on-write the packet before mangling it */ 570 /* copy-on-write the packet before mangling it */
@@ -422,19 +574,28 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
422 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 574 if (skb_cow(skb, rt->dst.dev->hard_header_len))
423 goto tx_error_put; 575 goto tx_error_put;
424 576
425 /* drop old route */
426 skb_dst_drop(skb);
427 skb_dst_set(skb, &rt->dst);
428
429 /* mangle the packet */ 577 /* mangle the packet */
430 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 578 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
431 goto tx_error; 579 goto tx_error_put;
432 ip_hdr(skb)->daddr = cp->daddr.ip; 580 ip_hdr(skb)->daddr = cp->daddr.ip;
433 ip_send_check(ip_hdr(skb)); 581 ip_send_check(ip_hdr(skb));
434 582
435 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 583 if (!local) {
584 /* drop old route */
585 skb_dst_drop(skb);
586 skb_dst_set(skb, &rt->dst);
587 } else {
588 ip_rt_put(rt);
589 /*
590 * Some IPv4 replies get local address from routes,
591 * not from iph, so while we DNAT after routing
592 * we need this second input/output route.
593 */
594 if (!__ip_vs_reroute_locally(skb))
595 goto tx_error;
596 }
436 597
437 ip_vs_update_conntrack(skb, cp, 1); 598 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
438 599
439 /* FIXME: when application helper enlarges the packet and the length 600 /* FIXME: when application helper enlarges the packet and the length
440 is larger than the MTU of outgoing device, there will be still 601 is larger than the MTU of outgoing device, there will be still
@@ -443,7 +604,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
443 /* Another hack: avoid icmp_send in ip_fragment */ 604 /* Another hack: avoid icmp_send in ip_fragment */
444 skb->local_df = 1; 605 skb->local_df = 1;
445 606
446 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 607 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
447 608
448 LeaveFunction(10); 609 LeaveFunction(10);
449 return NF_STOLEN; 610 return NF_STOLEN;
@@ -451,8 +612,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
451 tx_error_icmp: 612 tx_error_icmp:
452 dst_link_failure(skb); 613 dst_link_failure(skb);
453 tx_error: 614 tx_error:
454 LeaveFunction(10);
455 kfree_skb(skb); 615 kfree_skb(skb);
616 LeaveFunction(10);
456 return NF_STOLEN; 617 return NF_STOLEN;
457 tx_error_put: 618 tx_error_put:
458 ip_rt_put(rt); 619 ip_rt_put(rt);
@@ -466,6 +627,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
466{ 627{
467 struct rt6_info *rt; /* Route to the other host */ 628 struct rt6_info *rt; /* Route to the other host */
468 int mtu; 629 int mtu;
630 int local;
469 631
470 EnterFunction(10); 632 EnterFunction(10);
471 633
@@ -480,18 +642,49 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
480 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 642 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
481 } 643 }
482 644
483 rt = __ip_vs_get_out_rt_v6(cp); 645 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
484 if (!rt) 646 0, 1|2|4)))
485 goto tx_error_icmp; 647 goto tx_error_icmp;
648 local = __ip_vs_is_local_route6(rt);
649 /*
650 * Avoid duplicate tuple in reply direction for NAT traffic
651 * to local address when connection is sync-ed
652 */
653#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
654 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
655 enum ip_conntrack_info ctinfo;
656 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
657
658 if (ct && !nf_ct_is_untracked(ct)) {
659 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
660 "ip_vs_nat_xmit_v6(): "
661 "stopping DNAT to local address");
662 goto tx_error_put;
663 }
664 }
665#endif
666
667 /* From world but DNAT to loopback address? */
668 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
669 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
670 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
671 "ip_vs_nat_xmit_v6(): "
672 "stopping DNAT to loopback address");
673 goto tx_error_put;
674 }
486 675
487 /* MTU checking */ 676 /* MTU checking */
488 mtu = dst_mtu(&rt->dst); 677 mtu = dst_mtu(&rt->dst);
489 if (skb->len > mtu) { 678 if (skb->len > mtu) {
490 dst_release(&rt->dst); 679 if (!skb->dev) {
680 struct net *net = dev_net(skb_dst(skb)->dev);
681
682 skb->dev = net->loopback_dev;
683 }
491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 684 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 685 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
493 "ip_vs_nat_xmit_v6(): frag needed for"); 686 "ip_vs_nat_xmit_v6(): frag needed for");
494 goto tx_error; 687 goto tx_error_put;
495 } 688 }
496 689
497 /* copy-on-write the packet before mangling it */ 690 /* copy-on-write the packet before mangling it */
@@ -501,18 +694,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
501 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 694 if (skb_cow(skb, rt->dst.dev->hard_header_len))
502 goto tx_error_put; 695 goto tx_error_put;
503 696
504 /* drop old route */
505 skb_dst_drop(skb);
506 skb_dst_set(skb, &rt->dst);
507
508 /* mangle the packet */ 697 /* mangle the packet */
509 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 698 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
510 goto tx_error; 699 goto tx_error;
511 ipv6_hdr(skb)->daddr = cp->daddr.in6; 700 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
512 701
513 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 702 if (!local || !skb->dev) {
703 /* drop the old route when skb is not shared */
704 skb_dst_drop(skb);
705 skb_dst_set(skb, &rt->dst);
706 } else {
707 /* destined to loopback, do we need to change route? */
708 dst_release(&rt->dst);
709 }
514 710
515 ip_vs_update_conntrack(skb, cp, 1); 711 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
516 712
517 /* FIXME: when application helper enlarges the packet and the length 713 /* FIXME: when application helper enlarges the packet and the length
518 is larger than the MTU of outgoing device, there will be still 714 is larger than the MTU of outgoing device, there will be still
@@ -521,7 +717,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
521 /* Another hack: avoid icmp_send in ip_fragment */ 717 /* Another hack: avoid icmp_send in ip_fragment */
522 skb->local_df = 1; 718 skb->local_df = 1;
523 719
524 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 720 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
525 721
526 LeaveFunction(10); 722 LeaveFunction(10);
527 return NF_STOLEN; 723 return NF_STOLEN;
@@ -567,30 +763,27 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
567 struct iphdr *old_iph = ip_hdr(skb); 763 struct iphdr *old_iph = ip_hdr(skb);
568 u8 tos = old_iph->tos; 764 u8 tos = old_iph->tos;
569 __be16 df = old_iph->frag_off; 765 __be16 df = old_iph->frag_off;
570 sk_buff_data_t old_transport_header = skb->transport_header;
571 struct iphdr *iph; /* Our new IP header */ 766 struct iphdr *iph; /* Our new IP header */
572 unsigned int max_headroom; /* The extra header space needed */ 767 unsigned int max_headroom; /* The extra header space needed */
573 int mtu; 768 int mtu;
769 int ret;
574 770
575 EnterFunction(10); 771 EnterFunction(10);
576 772
577 if (skb->protocol != htons(ETH_P_IP)) { 773 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
578 IP_VS_DBG_RL("%s(): protocol error, " 774 RT_TOS(tos), 1|2)))
579 "ETH_P_IP: %d, skb protocol: %d\n",
580 __func__, htons(ETH_P_IP), skb->protocol);
581 goto tx_error;
582 }
583
584 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
585 goto tx_error_icmp; 775 goto tx_error_icmp;
776 if (rt->rt_flags & RTCF_LOCAL) {
777 ip_rt_put(rt);
778 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
779 }
586 780
587 tdev = rt->dst.dev; 781 tdev = rt->dst.dev;
588 782
589 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 783 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
590 if (mtu < 68) { 784 if (mtu < 68) {
591 ip_rt_put(rt);
592 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 785 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
593 goto tx_error; 786 goto tx_error_put;
594 } 787 }
595 if (skb_dst(skb)) 788 if (skb_dst(skb))
596 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 789 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -600,9 +793,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
600 if ((old_iph->frag_off & htons(IP_DF)) 793 if ((old_iph->frag_off & htons(IP_DF))
601 && mtu < ntohs(old_iph->tot_len)) { 794 && mtu < ntohs(old_iph->tot_len)) {
602 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 795 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
603 ip_rt_put(rt);
604 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 796 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
605 goto tx_error; 797 goto tx_error_put;
606 } 798 }
607 799
608 /* 800 /*
@@ -625,7 +817,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
625 old_iph = ip_hdr(skb); 817 old_iph = ip_hdr(skb);
626 } 818 }
627 819
628 skb->transport_header = old_transport_header; 820 skb->transport_header = skb->network_header;
629 821
630 /* fix old IP header checksum */ 822 /* fix old IP header checksum */
631 ip_send_check(old_iph); 823 ip_send_check(old_iph);
@@ -655,7 +847,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
655 /* Another hack: avoid icmp_send in ip_fragment */ 847 /* Another hack: avoid icmp_send in ip_fragment */
656 skb->local_df = 1; 848 skb->local_df = 1;
657 849
658 ip_local_out(skb); 850 ret = IP_VS_XMIT_TUNNEL(skb, cp);
851 if (ret == NF_ACCEPT)
852 ip_local_out(skb);
853 else if (ret == NF_DROP)
854 kfree_skb(skb);
659 855
660 LeaveFunction(10); 856 LeaveFunction(10);
661 857
@@ -667,6 +863,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
667 kfree_skb(skb); 863 kfree_skb(skb);
668 LeaveFunction(10); 864 LeaveFunction(10);
669 return NF_STOLEN; 865 return NF_STOLEN;
866tx_error_put:
867 ip_rt_put(rt);
868 goto tx_error;
670} 869}
671 870
672#ifdef CONFIG_IP_VS_IPV6 871#ifdef CONFIG_IP_VS_IPV6
@@ -675,43 +874,44 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
675 struct ip_vs_protocol *pp) 874 struct ip_vs_protocol *pp)
676{ 875{
677 struct rt6_info *rt; /* Route to the other host */ 876 struct rt6_info *rt; /* Route to the other host */
877 struct in6_addr saddr; /* Source for tunnel */
678 struct net_device *tdev; /* Device to other host */ 878 struct net_device *tdev; /* Device to other host */
679 struct ipv6hdr *old_iph = ipv6_hdr(skb); 879 struct ipv6hdr *old_iph = ipv6_hdr(skb);
680 sk_buff_data_t old_transport_header = skb->transport_header;
681 struct ipv6hdr *iph; /* Our new IP header */ 880 struct ipv6hdr *iph; /* Our new IP header */
682 unsigned int max_headroom; /* The extra header space needed */ 881 unsigned int max_headroom; /* The extra header space needed */
683 int mtu; 882 int mtu;
883 int ret;
684 884
685 EnterFunction(10); 885 EnterFunction(10);
686 886
687 if (skb->protocol != htons(ETH_P_IPV6)) { 887 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
688 IP_VS_DBG_RL("%s(): protocol error, " 888 &saddr, 1, 1|2)))
689 "ETH_P_IPV6: %d, skb protocol: %d\n",
690 __func__, htons(ETH_P_IPV6), skb->protocol);
691 goto tx_error;
692 }
693
694 rt = __ip_vs_get_out_rt_v6(cp);
695 if (!rt)
696 goto tx_error_icmp; 889 goto tx_error_icmp;
890 if (__ip_vs_is_local_route6(rt)) {
891 dst_release(&rt->dst);
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
893 }
697 894
698 tdev = rt->dst.dev; 895 tdev = rt->dst.dev;
699 896
700 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 897 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
701 /* TODO IPv6: do we need this check in IPv6? */ 898 if (mtu < IPV6_MIN_MTU) {
702 if (mtu < 1280) { 899 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
703 dst_release(&rt->dst); 900 IPV6_MIN_MTU);
704 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 901 goto tx_error_put;
705 goto tx_error;
706 } 902 }
707 if (skb_dst(skb)) 903 if (skb_dst(skb))
708 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 904 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
709 905
710 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 906 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
907 if (!skb->dev) {
908 struct net *net = dev_net(skb_dst(skb)->dev);
909
910 skb->dev = net->loopback_dev;
911 }
711 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 912 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
712 dst_release(&rt->dst);
713 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 913 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
714 goto tx_error; 914 goto tx_error_put;
715 } 915 }
716 916
717 /* 917 /*
@@ -734,7 +934,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
734 old_iph = ipv6_hdr(skb); 934 old_iph = ipv6_hdr(skb);
735 } 935 }
736 936
737 skb->transport_header = old_transport_header; 937 skb->transport_header = skb->network_header;
738 938
739 skb_push(skb, sizeof(struct ipv6hdr)); 939 skb_push(skb, sizeof(struct ipv6hdr));
740 skb_reset_network_header(skb); 940 skb_reset_network_header(skb);
@@ -754,14 +954,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
754 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 954 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
755 iph->priority = old_iph->priority; 955 iph->priority = old_iph->priority;
756 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 956 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
757 iph->daddr = rt->rt6i_dst.addr; 957 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
758 iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ 958 ipv6_addr_copy(&iph->saddr, &saddr);
759 iph->hop_limit = old_iph->hop_limit; 959 iph->hop_limit = old_iph->hop_limit;
760 960
761 /* Another hack: avoid icmp_send in ip_fragment */ 961 /* Another hack: avoid icmp_send in ip_fragment */
762 skb->local_df = 1; 962 skb->local_df = 1;
763 963
764 ip6_local_out(skb); 964 ret = IP_VS_XMIT_TUNNEL(skb, cp);
965 if (ret == NF_ACCEPT)
966 ip6_local_out(skb);
967 else if (ret == NF_DROP)
968 kfree_skb(skb);
765 969
766 LeaveFunction(10); 970 LeaveFunction(10);
767 971
@@ -773,6 +977,9 @@ tx_error:
773 kfree_skb(skb); 977 kfree_skb(skb);
774 LeaveFunction(10); 978 LeaveFunction(10);
775 return NF_STOLEN; 979 return NF_STOLEN;
980tx_error_put:
981 dst_release(&rt->dst);
982 goto tx_error;
776} 983}
777#endif 984#endif
778 985
@@ -791,8 +998,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
791 998
792 EnterFunction(10); 999 EnterFunction(10);
793 1000
794 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 1001 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1002 RT_TOS(iph->tos), 1|2)))
795 goto tx_error_icmp; 1003 goto tx_error_icmp;
1004 if (rt->rt_flags & RTCF_LOCAL) {
1005 ip_rt_put(rt);
1006 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1007 }
796 1008
797 /* MTU checking */ 1009 /* MTU checking */
798 mtu = dst_mtu(&rt->dst); 1010 mtu = dst_mtu(&rt->dst);
@@ -820,7 +1032,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
820 /* Another hack: avoid icmp_send in ip_fragment */ 1032 /* Another hack: avoid icmp_send in ip_fragment */
821 skb->local_df = 1; 1033 skb->local_df = 1;
822 1034
823 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1035 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
824 1036
825 LeaveFunction(10); 1037 LeaveFunction(10);
826 return NF_STOLEN; 1038 return NF_STOLEN;
@@ -843,13 +1055,22 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
843 1055
844 EnterFunction(10); 1056 EnterFunction(10);
845 1057
846 rt = __ip_vs_get_out_rt_v6(cp); 1058 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
847 if (!rt) 1059 0, 1|2)))
848 goto tx_error_icmp; 1060 goto tx_error_icmp;
1061 if (__ip_vs_is_local_route6(rt)) {
1062 dst_release(&rt->dst);
1063 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1064 }
849 1065
850 /* MTU checking */ 1066 /* MTU checking */
851 mtu = dst_mtu(&rt->dst); 1067 mtu = dst_mtu(&rt->dst);
852 if (skb->len > mtu) { 1068 if (skb->len > mtu) {
1069 if (!skb->dev) {
1070 struct net *net = dev_net(skb_dst(skb)->dev);
1071
1072 skb->dev = net->loopback_dev;
1073 }
853 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1074 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
854 dst_release(&rt->dst); 1075 dst_release(&rt->dst);
855 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1076 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -873,7 +1094,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
873 /* Another hack: avoid icmp_send in ip_fragment */ 1094 /* Another hack: avoid icmp_send in ip_fragment */
874 skb->local_df = 1; 1095 skb->local_df = 1;
875 1096
876 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1097 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
877 1098
878 LeaveFunction(10); 1099 LeaveFunction(10);
879 return NF_STOLEN; 1100 return NF_STOLEN;
@@ -899,6 +1120,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
899 struct rtable *rt; /* Route to the other host */ 1120 struct rtable *rt; /* Route to the other host */
900 int mtu; 1121 int mtu;
901 int rc; 1122 int rc;
1123 int local;
902 1124
903 EnterFunction(10); 1125 EnterFunction(10);
904 1126
@@ -919,16 +1141,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
919 * mangle and send the packet here (only for VS/NAT) 1141 * mangle and send the packet here (only for VS/NAT)
920 */ 1142 */
921 1143
922 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) 1144 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1145 RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
923 goto tx_error_icmp; 1146 goto tx_error_icmp;
1147 local = rt->rt_flags & RTCF_LOCAL;
1148
1149 /*
1150 * Avoid duplicate tuple in reply direction for NAT traffic
1151 * to local address when connection is sync-ed
1152 */
1153#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1154 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1155 enum ip_conntrack_info ctinfo;
1156 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1157
1158 if (ct && !nf_ct_is_untracked(ct)) {
1159 IP_VS_DBG(10, "%s(): "
1160 "stopping DNAT to local address %pI4\n",
1161 __func__, &cp->daddr.ip);
1162 goto tx_error_put;
1163 }
1164 }
1165#endif
1166
1167 /* From world but DNAT to loopback address? */
1168 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
1169 IP_VS_DBG(1, "%s(): "
1170 "stopping DNAT to loopback %pI4\n",
1171 __func__, &cp->daddr.ip);
1172 goto tx_error_put;
1173 }
924 1174
925 /* MTU checking */ 1175 /* MTU checking */
926 mtu = dst_mtu(&rt->dst); 1176 mtu = dst_mtu(&rt->dst);
927 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1177 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
928 ip_rt_put(rt);
929 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1178 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
930 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1179 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
931 goto tx_error; 1180 goto tx_error_put;
932 } 1181 }
933 1182
934 /* copy-on-write the packet before mangling it */ 1183 /* copy-on-write the packet before mangling it */
@@ -938,16 +1187,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
938 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1187 if (skb_cow(skb, rt->dst.dev->hard_header_len))
939 goto tx_error_put; 1188 goto tx_error_put;
940 1189
941 /* drop the old route when skb is not shared */
942 skb_dst_drop(skb);
943 skb_dst_set(skb, &rt->dst);
944
945 ip_vs_nat_icmp(skb, pp, cp, 0); 1190 ip_vs_nat_icmp(skb, pp, cp, 0);
946 1191
1192 if (!local) {
1193 /* drop the old route when skb is not shared */
1194 skb_dst_drop(skb);
1195 skb_dst_set(skb, &rt->dst);
1196 } else {
1197 ip_rt_put(rt);
1198 /*
1199 * Some IPv4 replies get local address from routes,
1200 * not from iph, so while we DNAT after routing
1201 * we need this second input/output route.
1202 */
1203 if (!__ip_vs_reroute_locally(skb))
1204 goto tx_error;
1205 }
1206
947 /* Another hack: avoid icmp_send in ip_fragment */ 1207 /* Another hack: avoid icmp_send in ip_fragment */
948 skb->local_df = 1; 1208 skb->local_df = 1;
949 1209
950 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1210 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
951 1211
952 rc = NF_STOLEN; 1212 rc = NF_STOLEN;
953 goto out; 1213 goto out;
@@ -973,6 +1233,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
973 struct rt6_info *rt; /* Route to the other host */ 1233 struct rt6_info *rt; /* Route to the other host */
974 int mtu; 1234 int mtu;
975 int rc; 1235 int rc;
1236 int local;
976 1237
977 EnterFunction(10); 1238 EnterFunction(10);
978 1239
@@ -993,17 +1254,49 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
993 * mangle and send the packet here (only for VS/NAT) 1254 * mangle and send the packet here (only for VS/NAT)
994 */ 1255 */
995 1256
996 rt = __ip_vs_get_out_rt_v6(cp); 1257 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
997 if (!rt) 1258 0, 1|2|4)))
998 goto tx_error_icmp; 1259 goto tx_error_icmp;
999 1260
1261 local = __ip_vs_is_local_route6(rt);
1262 /*
1263 * Avoid duplicate tuple in reply direction for NAT traffic
1264 * to local address when connection is sync-ed
1265 */
1266#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1267 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1268 enum ip_conntrack_info ctinfo;
1269 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1270
1271 if (ct && !nf_ct_is_untracked(ct)) {
1272 IP_VS_DBG(10, "%s(): "
1273 "stopping DNAT to local address %pI6\n",
1274 __func__, &cp->daddr.in6);
1275 goto tx_error_put;
1276 }
1277 }
1278#endif
1279
1280 /* From world but DNAT to loopback address? */
1281 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1282 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1283 IP_VS_DBG(1, "%s(): "
1284 "stopping DNAT to loopback %pI6\n",
1285 __func__, &cp->daddr.in6);
1286 goto tx_error_put;
1287 }
1288
1000 /* MTU checking */ 1289 /* MTU checking */
1001 mtu = dst_mtu(&rt->dst); 1290 mtu = dst_mtu(&rt->dst);
1002 if (skb->len > mtu) { 1291 if (skb->len > mtu) {
1003 dst_release(&rt->dst); 1292 if (!skb->dev) {
1293 struct net *net = dev_net(skb_dst(skb)->dev);
1294
1295 skb->dev = net->loopback_dev;
1296 }
1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1297 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1005 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1298 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1006 goto tx_error; 1299 goto tx_error_put;
1007 } 1300 }
1008 1301
1009 /* copy-on-write the packet before mangling it */ 1302 /* copy-on-write the packet before mangling it */
@@ -1013,16 +1306,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1013 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1306 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1014 goto tx_error_put; 1307 goto tx_error_put;
1015 1308
1016 /* drop the old route when skb is not shared */
1017 skb_dst_drop(skb);
1018 skb_dst_set(skb, &rt->dst);
1019
1020 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1309 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1021 1310
1311 if (!local || !skb->dev) {
1312 /* drop the old route when skb is not shared */
1313 skb_dst_drop(skb);
1314 skb_dst_set(skb, &rt->dst);
1315 } else {
1316 /* destined to loopback, do we need to change route? */
1317 dst_release(&rt->dst);
1318 }
1319
1022 /* Another hack: avoid icmp_send in ip_fragment */ 1320 /* Another hack: avoid icmp_send in ip_fragment */
1023 skb->local_df = 1; 1321 skb->local_df = 1;
1024 1322
1025 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1323 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1026 1324
1027 rc = NF_STOLEN; 1325 rc = NF_STOLEN;
1028 goto out; 1326 goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df3eedb142ff..27a5ea6b6a0f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,32 +65,42 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static int nf_conntrack_hash_rnd_initted; 68static unsigned int nf_conntrack_hash_rnd __read_mostly;
69static unsigned int nf_conntrack_hash_rnd;
70 69
71static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 70static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
72 u16 zone, unsigned int size, unsigned int rnd)
73{ 71{
74 unsigned int n; 72 unsigned int n;
75 u_int32_t h;
76 73
77 /* The direction must be ignored, so we hash everything up to the 74 /* The direction must be ignored, so we hash everything up to the
78 * destination ports (which is a multiple of 4) and treat the last 75 * destination ports (which is a multiple of 4) and treat the last
79 * three bytes manually. 76 * three bytes manually.
80 */ 77 */
81 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 78 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
82 h = jhash2((u32 *)tuple, n, 79 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
83 zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 80 (((__force __u16)tuple->dst.u.all << 16) |
84 tuple->dst.protonum)); 81 tuple->dst.protonum));
82}
83
84static u32 __hash_bucket(u32 hash, unsigned int size)
85{
86 return ((u64)hash * size) >> 32;
87}
88
89static u32 hash_bucket(u32 hash, const struct net *net)
90{
91 return __hash_bucket(hash, net->ct.htable_size);
92}
85 93
86 return ((u64)h * size) >> 32; 94static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
95 u16 zone, unsigned int size)
96{
97 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
87} 98}
88 99
89static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, 100static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
90 const struct nf_conntrack_tuple *tuple) 101 const struct nf_conntrack_tuple *tuple)
91{ 102{
92 return __hash_conntrack(tuple, zone, net->ct.htable_size, 103 return __hash_conntrack(tuple, zone, net->ct.htable_size);
93 nf_conntrack_hash_rnd);
94} 104}
95 105
96bool 106bool
@@ -292,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
292 * OR 302 * OR
293 * - Caller must lock nf_conntrack_lock before calling this function 303 * - Caller must lock nf_conntrack_lock before calling this function
294 */ 304 */
295struct nf_conntrack_tuple_hash * 305static struct nf_conntrack_tuple_hash *
296__nf_conntrack_find(struct net *net, u16 zone, 306____nf_conntrack_find(struct net *net, u16 zone,
297 const struct nf_conntrack_tuple *tuple) 307 const struct nf_conntrack_tuple *tuple, u32 hash)
298{ 308{
299 struct nf_conntrack_tuple_hash *h; 309 struct nf_conntrack_tuple_hash *h;
300 struct hlist_nulls_node *n; 310 struct hlist_nulls_node *n;
301 unsigned int hash = hash_conntrack(net, zone, tuple); 311 unsigned int bucket = hash_bucket(hash, net);
302 312
303 /* Disable BHs the entire time since we normally need to disable them 313 /* Disable BHs the entire time since we normally need to disable them
304 * at least once for the stats anyway. 314 * at least once for the stats anyway.
305 */ 315 */
306 local_bh_disable(); 316 local_bh_disable();
307begin: 317begin:
308 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 318 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
309 if (nf_ct_tuple_equal(tuple, &h->tuple) && 319 if (nf_ct_tuple_equal(tuple, &h->tuple) &&
310 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { 320 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
311 NF_CT_STAT_INC(net, found); 321 NF_CT_STAT_INC(net, found);
@@ -319,7 +329,7 @@ begin:
319 * not the expected one, we must restart lookup. 329 * not the expected one, we must restart lookup.
320 * We probably met an item that was moved to another chain. 330 * We probably met an item that was moved to another chain.
321 */ 331 */
322 if (get_nulls_value(n) != hash) { 332 if (get_nulls_value(n) != bucket) {
323 NF_CT_STAT_INC(net, search_restart); 333 NF_CT_STAT_INC(net, search_restart);
324 goto begin; 334 goto begin;
325 } 335 }
@@ -327,19 +337,27 @@ begin:
327 337
328 return NULL; 338 return NULL;
329} 339}
340
341struct nf_conntrack_tuple_hash *
342__nf_conntrack_find(struct net *net, u16 zone,
343 const struct nf_conntrack_tuple *tuple)
344{
345 return ____nf_conntrack_find(net, zone, tuple,
346 hash_conntrack_raw(tuple, zone));
347}
330EXPORT_SYMBOL_GPL(__nf_conntrack_find); 348EXPORT_SYMBOL_GPL(__nf_conntrack_find);
331 349
332/* Find a connection corresponding to a tuple. */ 350/* Find a connection corresponding to a tuple. */
333struct nf_conntrack_tuple_hash * 351static struct nf_conntrack_tuple_hash *
334nf_conntrack_find_get(struct net *net, u16 zone, 352__nf_conntrack_find_get(struct net *net, u16 zone,
335 const struct nf_conntrack_tuple *tuple) 353 const struct nf_conntrack_tuple *tuple, u32 hash)
336{ 354{
337 struct nf_conntrack_tuple_hash *h; 355 struct nf_conntrack_tuple_hash *h;
338 struct nf_conn *ct; 356 struct nf_conn *ct;
339 357
340 rcu_read_lock(); 358 rcu_read_lock();
341begin: 359begin:
342 h = __nf_conntrack_find(net, zone, tuple); 360 h = ____nf_conntrack_find(net, zone, tuple, hash);
343 if (h) { 361 if (h) {
344 ct = nf_ct_tuplehash_to_ctrack(h); 362 ct = nf_ct_tuplehash_to_ctrack(h);
345 if (unlikely(nf_ct_is_dying(ct) || 363 if (unlikely(nf_ct_is_dying(ct) ||
@@ -357,6 +375,14 @@ begin:
357 375
358 return h; 376 return h;
359} 377}
378
379struct nf_conntrack_tuple_hash *
380nf_conntrack_find_get(struct net *net, u16 zone,
381 const struct nf_conntrack_tuple *tuple)
382{
383 return __nf_conntrack_find_get(net, zone, tuple,
384 hash_conntrack_raw(tuple, zone));
385}
360EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 386EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
361 387
362static void __nf_conntrack_hash_insert(struct nf_conn *ct, 388static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -409,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
409 return NF_ACCEPT; 435 return NF_ACCEPT;
410 436
411 zone = nf_ct_zone(ct); 437 zone = nf_ct_zone(ct);
412 hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 438 /* reuse the hash saved before */
413 repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 439 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
440 hash = hash_bucket(hash, net);
441 repl_hash = hash_conntrack(net, zone,
442 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
414 443
415 /* We're not in hash table, and we refuse to set up related 444 /* We're not in hash table, and we refuse to set up related
416 connections for unconfirmed conns. But packet copies and 445 connections for unconfirmed conns. But packet copies and
@@ -567,17 +596,29 @@ static noinline int early_drop(struct net *net, unsigned int hash)
567 return dropped; 596 return dropped;
568} 597}
569 598
570struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, 599static struct nf_conn *
571 const struct nf_conntrack_tuple *orig, 600__nf_conntrack_alloc(struct net *net, u16 zone,
572 const struct nf_conntrack_tuple *repl, 601 const struct nf_conntrack_tuple *orig,
573 gfp_t gfp) 602 const struct nf_conntrack_tuple *repl,
603 gfp_t gfp, u32 hash)
574{ 604{
575 struct nf_conn *ct; 605 struct nf_conn *ct;
576 606
577 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 607 if (unlikely(!nf_conntrack_hash_rnd)) {
578 get_random_bytes(&nf_conntrack_hash_rnd, 608 unsigned int rand;
579 sizeof(nf_conntrack_hash_rnd)); 609
580 nf_conntrack_hash_rnd_initted = 1; 610 /*
611 * Why not initialize nf_conntrack_rnd in a "init()" function ?
612 * Because there isn't enough entropy when system initializing,
613 * and we initialize it as late as possible.
614 */
615 do {
616 get_random_bytes(&rand, sizeof(rand));
617 } while (!rand);
618 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
619
620 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
621 hash = hash_conntrack_raw(orig, zone);
581 } 622 }
582 623
583 /* We don't want any race condition at early drop stage */ 624 /* We don't want any race condition at early drop stage */
@@ -585,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
585 626
586 if (nf_conntrack_max && 627 if (nf_conntrack_max &&
587 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 628 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
588 unsigned int hash = hash_conntrack(net, zone, orig); 629 if (!early_drop(net, hash_bucket(hash, net))) {
589 if (!early_drop(net, hash)) {
590 atomic_dec(&net->ct.count); 630 atomic_dec(&net->ct.count);
591 if (net_ratelimit()) 631 if (net_ratelimit())
592 printk(KERN_WARNING 632 printk(KERN_WARNING
@@ -616,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
616 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 656 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
617 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 657 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
618 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 658 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 659 /* save hash for reusing when confirming */
660 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
620 /* Don't set timer yet: wait for confirmation */ 661 /* Don't set timer yet: wait for confirmation */
621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 662 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
622 write_pnet(&ct->ct_net, net); 663 write_pnet(&ct->ct_net, net);
@@ -643,6 +684,14 @@ out_free:
643 return ERR_PTR(-ENOMEM); 684 return ERR_PTR(-ENOMEM);
644#endif 685#endif
645} 686}
687
688struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
689 const struct nf_conntrack_tuple *orig,
690 const struct nf_conntrack_tuple *repl,
691 gfp_t gfp)
692{
693 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
694}
646EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 695EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
647 696
648void nf_conntrack_free(struct nf_conn *ct) 697void nf_conntrack_free(struct nf_conn *ct)
@@ -664,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
664 struct nf_conntrack_l3proto *l3proto, 713 struct nf_conntrack_l3proto *l3proto,
665 struct nf_conntrack_l4proto *l4proto, 714 struct nf_conntrack_l4proto *l4proto,
666 struct sk_buff *skb, 715 struct sk_buff *skb,
667 unsigned int dataoff) 716 unsigned int dataoff, u32 hash)
668{ 717{
669 struct nf_conn *ct; 718 struct nf_conn *ct;
670 struct nf_conn_help *help; 719 struct nf_conn_help *help;
@@ -678,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
678 return NULL; 727 return NULL;
679 } 728 }
680 729
681 ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); 730 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
731 hash);
682 if (IS_ERR(ct)) { 732 if (IS_ERR(ct)) {
683 pr_debug("Can't allocate conntrack.\n"); 733 pr_debug("Can't allocate conntrack.\n");
684 return (struct nf_conntrack_tuple_hash *)ct; 734 return (struct nf_conntrack_tuple_hash *)ct;
@@ -755,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
755 struct nf_conntrack_tuple_hash *h; 805 struct nf_conntrack_tuple_hash *h;
756 struct nf_conn *ct; 806 struct nf_conn *ct;
757 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 807 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
808 u32 hash;
758 809
759 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 810 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
760 dataoff, l3num, protonum, &tuple, l3proto, 811 dataoff, l3num, protonum, &tuple, l3proto,
@@ -764,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
764 } 815 }
765 816
766 /* look for tuple match */ 817 /* look for tuple match */
767 h = nf_conntrack_find_get(net, zone, &tuple); 818 hash = hash_conntrack_raw(&tuple, zone);
819 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
768 if (!h) { 820 if (!h) {
769 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 821 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
770 skb, dataoff); 822 skb, dataoff, hash);
771 if (!h) 823 if (!h)
772 return NULL; 824 return NULL;
773 if (IS_ERR(h)) 825 if (IS_ERR(h))
@@ -1260,7 +1312,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
1260 if (!hash) { 1312 if (!hash) {
1261 *vmalloced = 1; 1313 *vmalloced = 1;
1262 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1314 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1263 hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 1315 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1316 PAGE_KERNEL);
1264 } 1317 }
1265 1318
1266 if (hash && nulls) 1319 if (hash && nulls)
@@ -1307,8 +1360,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1307 ct = nf_ct_tuplehash_to_ctrack(h); 1360 ct = nf_ct_tuplehash_to_ctrack(h);
1308 hlist_nulls_del_rcu(&h->hnnode); 1361 hlist_nulls_del_rcu(&h->hnnode);
1309 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), 1362 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
1310 hashsize, 1363 hashsize);
1311 nf_conntrack_hash_rnd);
1312 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1364 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1313 } 1365 }
1314 } 1366 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acb29ccaa41f..46e8966912b1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,25 +38,30 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly;
38 38
39static struct kmem_cache *nf_ct_expect_cachep __read_mostly; 39static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 40
41static HLIST_HEAD(nf_ct_userspace_expect_list);
42
41/* nf_conntrack_expect helper functions */ 43/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) 44void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
45 u32 pid, int report)
43{ 46{
44 struct nf_conn_help *master_help = nfct_help(exp->master); 47 struct nf_conn_help *master_help = nfct_help(exp->master);
45 struct net *net = nf_ct_exp_net(exp); 48 struct net *net = nf_ct_exp_net(exp);
46 49
47 NF_CT_ASSERT(master_help);
48 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 50 NF_CT_ASSERT(!timer_pending(&exp->timeout));
49 51
50 hlist_del_rcu(&exp->hnode); 52 hlist_del_rcu(&exp->hnode);
51 net->ct.expect_count--; 53 net->ct.expect_count--;
52 54
53 hlist_del(&exp->lnode); 55 hlist_del(&exp->lnode);
54 master_help->expecting[exp->class]--; 56 if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
57 master_help->expecting[exp->class]--;
58
59 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
55 nf_ct_expect_put(exp); 60 nf_ct_expect_put(exp);
56 61
57 NF_CT_STAT_INC(net, expect_delete); 62 NF_CT_STAT_INC(net, expect_delete);
58} 63}
59EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); 64EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
60 65
61static void nf_ct_expectation_timed_out(unsigned long ul_expect) 66static void nf_ct_expectation_timed_out(unsigned long ul_expect)
62{ 67{
@@ -320,16 +325,21 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
320 325
321 atomic_inc(&exp->use); 326 atomic_inc(&exp->use);
322 327
323 hlist_add_head(&exp->lnode, &master_help->expectations); 328 if (master_help) {
324 master_help->expecting[exp->class]++; 329 hlist_add_head(&exp->lnode, &master_help->expectations);
330 master_help->expecting[exp->class]++;
331 } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
332 hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
325 333
326 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); 334 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
327 net->ct.expect_count++; 335 net->ct.expect_count++;
328 336
329 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 337 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
330 (unsigned long)exp); 338 (unsigned long)exp);
331 p = &master_help->helper->expect_policy[exp->class]; 339 if (master_help) {
332 exp->timeout.expires = jiffies + p->timeout * HZ; 340 p = &master_help->helper->expect_policy[exp->class];
341 exp->timeout.expires = jiffies + p->timeout * HZ;
342 }
333 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
334 344
335 atomic_inc(&exp->use); 345 atomic_inc(&exp->use);
@@ -380,7 +390,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
380 unsigned int h; 390 unsigned int h;
381 int ret = 1; 391 int ret = 1;
382 392
383 if (!master_help->helper) { 393 /* Don't allow expectations created from kernel-space with no helper */
394 if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
395 (!master_help || (master_help && !master_help->helper))) {
384 ret = -ESHUTDOWN; 396 ret = -ESHUTDOWN;
385 goto out; 397 goto out;
386 } 398 }
@@ -398,13 +410,16 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
398 } 410 }
399 } 411 }
400 /* Will be over limit? */ 412 /* Will be over limit? */
401 p = &master_help->helper->expect_policy[expect->class]; 413 if (master_help) {
402 if (p->max_expected && 414 p = &master_help->helper->expect_policy[expect->class];
403 master_help->expecting[expect->class] >= p->max_expected) { 415 if (p->max_expected &&
404 evict_oldest_expect(master, expect); 416 master_help->expecting[expect->class] >= p->max_expected) {
405 if (master_help->expecting[expect->class] >= p->max_expected) { 417 evict_oldest_expect(master, expect);
406 ret = -EMFILE; 418 if (master_help->expecting[expect->class]
407 goto out; 419 >= p->max_expected) {
420 ret = -EMFILE;
421 goto out;
422 }
408 } 423 }
409 } 424 }
410 425
@@ -439,6 +454,21 @@ out:
439} 454}
440EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); 455EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
441 456
457void nf_ct_remove_userspace_expectations(void)
458{
459 struct nf_conntrack_expect *exp;
460 struct hlist_node *n, *next;
461
462 hlist_for_each_entry_safe(exp, n, next,
463 &nf_ct_userspace_expect_list, lnode) {
464 if (del_timer(&exp->timeout)) {
465 nf_ct_unlink_expect(exp);
466 nf_ct_expect_put(exp);
467 }
468 }
469}
470EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
471
442#ifdef CONFIG_PROC_FS 472#ifdef CONFIG_PROC_FS
443struct ct_expect_iter_state { 473struct ct_expect_iter_state {
444 struct seq_net_private p; 474 struct seq_net_private p;
@@ -529,8 +559,12 @@ static int exp_seq_show(struct seq_file *s, void *v)
529 seq_printf(s, "PERMANENT"); 559 seq_printf(s, "PERMANENT");
530 delim = ","; 560 delim = ",";
531 } 561 }
532 if (expect->flags & NF_CT_EXPECT_INACTIVE) 562 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
533 seq_printf(s, "%sINACTIVE", delim); 563 seq_printf(s, "%sINACTIVE", delim);
564 delim = ",";
565 }
566 if (expect->flags & NF_CT_EXPECT_USERSPACE)
567 seq_printf(s, "%sUSERSPACE", delim);
534 568
535 helper = rcu_dereference(nfct_help(expect->master)->helper); 569 helper = rcu_dereference(nfct_help(expect->master)->helper);
536 if (helper) { 570 if (helper) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 146476c6441a..b729ace1dcc1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1588,8 +1588,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1588 const struct nf_conntrack_expect *exp) 1588 const struct nf_conntrack_expect *exp)
1589{ 1589{
1590 struct nf_conn *master = exp->master; 1590 struct nf_conn *master = exp->master;
1591 struct nf_conntrack_helper *helper;
1592 long timeout = (exp->timeout.expires - jiffies) / HZ; 1591 long timeout = (exp->timeout.expires - jiffies) / HZ;
1592 struct nf_conn_help *help;
1593 1593
1594 if (timeout < 0) 1594 if (timeout < 0)
1595 timeout = 0; 1595 timeout = 0;
@@ -1605,9 +1605,15 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1605 1605
1606 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); 1606 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
1607 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); 1607 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
1608 helper = rcu_dereference(nfct_help(master)->helper); 1608 NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags));
1609 if (helper) 1609 help = nfct_help(master);
1610 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); 1610 if (help) {
1611 struct nf_conntrack_helper *helper;
1612
1613 helper = rcu_dereference(help->helper);
1614 if (helper)
1615 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
1616 }
1611 1617
1612 return 0; 1618 return 0;
1613 1619
@@ -1654,17 +1660,20 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1654 struct nlmsghdr *nlh; 1660 struct nlmsghdr *nlh;
1655 struct nfgenmsg *nfmsg; 1661 struct nfgenmsg *nfmsg;
1656 struct sk_buff *skb; 1662 struct sk_buff *skb;
1657 unsigned int type; 1663 unsigned int type, group;
1658 int flags = 0; 1664 int flags = 0;
1659 1665
1660 if (events & (1 << IPEXP_NEW)) { 1666 if (events & (1 << IPEXP_DESTROY)) {
1667 type = IPCTNL_MSG_EXP_DELETE;
1668 group = NFNLGRP_CONNTRACK_EXP_DESTROY;
1669 } else if (events & (1 << IPEXP_NEW)) {
1661 type = IPCTNL_MSG_EXP_NEW; 1670 type = IPCTNL_MSG_EXP_NEW;
1662 flags = NLM_F_CREATE|NLM_F_EXCL; 1671 flags = NLM_F_CREATE|NLM_F_EXCL;
1672 group = NFNLGRP_CONNTRACK_EXP_NEW;
1663 } else 1673 } else
1664 return 0; 1674 return 0;
1665 1675
1666 if (!item->report && 1676 if (!item->report && !nfnetlink_has_listeners(net, group))
1667 !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
1668 return 0; 1677 return 0;
1669 1678
1670 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1679 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1687,8 +1696,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1687 rcu_read_unlock(); 1696 rcu_read_unlock();
1688 1697
1689 nlmsg_end(skb, nlh); 1698 nlmsg_end(skb, nlh);
1690 nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, 1699 nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
1691 item->report, GFP_ATOMIC);
1692 return 0; 1700 return 0;
1693 1701
1694nla_put_failure: 1702nla_put_failure:
@@ -1761,6 +1769,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
1761 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, 1769 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
1762 [CTA_EXPECT_ID] = { .type = NLA_U32 }, 1770 [CTA_EXPECT_ID] = { .type = NLA_U32 },
1763 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, 1771 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING },
1772 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
1773 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
1764}; 1774};
1765 1775
1766static int 1776static int
@@ -1869,7 +1879,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1869 } 1879 }
1870 1880
1871 /* after list removal, usage count == 1 */ 1881 /* after list removal, usage count == 1 */
1872 nf_ct_unexpect_related(exp); 1882 spin_lock_bh(&nf_conntrack_lock);
1883 if (del_timer(&exp->timeout)) {
1884 nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
1885 nlmsg_report(nlh));
1886 nf_ct_expect_put(exp);
1887 }
1888 spin_unlock_bh(&nf_conntrack_lock);
1873 /* have to put what we 'get' above. 1889 /* have to put what we 'get' above.
1874 * after this line usage count == 0 */ 1890 * after this line usage count == 0 */
1875 nf_ct_expect_put(exp); 1891 nf_ct_expect_put(exp);
@@ -1886,7 +1902,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1886 m_help = nfct_help(exp->master); 1902 m_help = nfct_help(exp->master);
1887 if (!strcmp(m_help->helper->name, name) && 1903 if (!strcmp(m_help->helper->name, name) &&
1888 del_timer(&exp->timeout)) { 1904 del_timer(&exp->timeout)) {
1889 nf_ct_unlink_expect(exp); 1905 nf_ct_unlink_expect_report(exp,
1906 NETLINK_CB(skb).pid,
1907 nlmsg_report(nlh));
1890 nf_ct_expect_put(exp); 1908 nf_ct_expect_put(exp);
1891 } 1909 }
1892 } 1910 }
@@ -1900,7 +1918,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1900 &net->ct.expect_hash[i], 1918 &net->ct.expect_hash[i],
1901 hnode) { 1919 hnode) {
1902 if (del_timer(&exp->timeout)) { 1920 if (del_timer(&exp->timeout)) {
1903 nf_ct_unlink_expect(exp); 1921 nf_ct_unlink_expect_report(exp,
1922 NETLINK_CB(skb).pid,
1923 nlmsg_report(nlh));
1904 nf_ct_expect_put(exp); 1924 nf_ct_expect_put(exp);
1905 } 1925 }
1906 } 1926 }
@@ -1946,23 +1966,35 @@ ctnetlink_create_expect(struct net *net, u16 zone,
1946 if (!h) 1966 if (!h)
1947 return -ENOENT; 1967 return -ENOENT;
1948 ct = nf_ct_tuplehash_to_ctrack(h); 1968 ct = nf_ct_tuplehash_to_ctrack(h);
1949 help = nfct_help(ct);
1950
1951 if (!help || !help->helper) {
1952 /* such conntrack hasn't got any helper, abort */
1953 err = -EOPNOTSUPP;
1954 goto out;
1955 }
1956
1957 exp = nf_ct_expect_alloc(ct); 1969 exp = nf_ct_expect_alloc(ct);
1958 if (!exp) { 1970 if (!exp) {
1959 err = -ENOMEM; 1971 err = -ENOMEM;
1960 goto out; 1972 goto out;
1961 } 1973 }
1974 help = nfct_help(ct);
1975 if (!help) {
1976 if (!cda[CTA_EXPECT_TIMEOUT]) {
1977 err = -EINVAL;
1978 goto out;
1979 }
1980 exp->timeout.expires =
1981 jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
1982
1983 exp->flags = NF_CT_EXPECT_USERSPACE;
1984 if (cda[CTA_EXPECT_FLAGS]) {
1985 exp->flags |=
1986 ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1987 }
1988 } else {
1989 if (cda[CTA_EXPECT_FLAGS]) {
1990 exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1991 exp->flags &= ~NF_CT_EXPECT_USERSPACE;
1992 } else
1993 exp->flags = 0;
1994 }
1962 1995
1963 exp->class = 0; 1996 exp->class = 0;
1964 exp->expectfn = NULL; 1997 exp->expectfn = NULL;
1965 exp->flags = 0;
1966 exp->master = ct; 1998 exp->master = ct;
1967 exp->helper = NULL; 1999 exp->helper = NULL;
1968 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); 2000 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
@@ -2130,6 +2162,7 @@ static void __exit ctnetlink_exit(void)
2130{ 2162{
2131 pr_info("ctnetlink: unregistering from nfnetlink.\n"); 2163 pr_info("ctnetlink: unregistering from nfnetlink.\n");
2132 2164
2165 nf_ct_remove_userspace_expectations();
2133#ifdef CONFIG_NF_CONNTRACK_EVENTS 2166#ifdef CONFIG_NF_CONNTRACK_EVENTS
2134 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); 2167 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
2135 nf_conntrack_unregister_notifier(&ctnl_notifier); 2168 nf_conntrack_unregister_notifier(&ctnl_notifier);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index ed6d92958023..dc7bb74110df 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -292,6 +292,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
292 292
293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 293 for (i = 0; i < MAX_NF_CT_PROTO; i++)
294 proto_array[i] = &nf_conntrack_l4proto_generic; 294 proto_array[i] = &nf_conntrack_l4proto_generic;
295
296 /* Before making proto_array visible to lockless readers,
297 * we must make sure its content is committed to memory.
298 */
299 smp_wmb();
300
295 nf_ct_protos[l4proto->l3proto] = proto_array; 301 nf_ct_protos[l4proto->l3proto] = proto_array;
296 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 302 } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
297 &nf_conntrack_l4proto_generic) { 303 &nf_conntrack_l4proto_generic) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index c4c885dca3bd..3fb2b73b24dc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -329,8 +329,8 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph)
329/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering 329/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
330 in IP Filter' by Guido van Rooij. 330 in IP Filter' by Guido van Rooij.
331 331
332 http://www.nluug.nl/events/sane2000/papers.html 332 http://www.sane.nl/events/sane2000/papers.html
333 http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz 333 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
334 334
335 The boundaries and the conditions are changed according to RFC793: 335 The boundaries and the conditions are changed according to RFC793:
336 the packet must intersect the window (i.e. segments may be 336 the packet must intersect the window (i.e. segments may be
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f64de9544866..bcf47eb518ef 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -130,6 +130,44 @@ static int digits_len(const struct nf_conn *ct, const char *dptr,
130 return len; 130 return len;
131} 131}
132 132
133static int iswordc(const char c)
134{
135 if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
136 (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
137 c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
138 c == '{' || c == '}' || c == '~')
139 return 1;
140 return 0;
141}
142
143static int word_len(const char *dptr, const char *limit)
144{
145 int len = 0;
146 while (dptr < limit && iswordc(*dptr)) {
147 dptr++;
148 len++;
149 }
150 return len;
151}
152
153static int callid_len(const struct nf_conn *ct, const char *dptr,
154 const char *limit, int *shift)
155{
156 int len, domain_len;
157
158 len = word_len(dptr, limit);
159 dptr += len;
160 if (!len || dptr == limit || *dptr != '@')
161 return len;
162 dptr++;
163 len++;
164
165 domain_len = word_len(dptr, limit);
166 if (!domain_len)
167 return 0;
168 return len + domain_len;
169}
170
133/* get media type + port length */ 171/* get media type + port length */
134static int media_len(const struct nf_conn *ct, const char *dptr, 172static int media_len(const struct nf_conn *ct, const char *dptr,
135 const char *limit, int *shift) 173 const char *limit, int *shift)
@@ -152,6 +190,9 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
152 const char *end; 190 const char *end;
153 int ret = 0; 191 int ret = 0;
154 192
193 if (!ct)
194 return 0;
195
155 memset(addr, 0, sizeof(*addr)); 196 memset(addr, 0, sizeof(*addr));
156 switch (nf_ct_l3num(ct)) { 197 switch (nf_ct_l3num(ct)) {
157 case AF_INET: 198 case AF_INET:
@@ -296,6 +337,7 @@ static const struct sip_header ct_sip_hdrs[] = {
296 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len), 337 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len),
297 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), 338 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len),
298 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), 339 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len),
340 [SIP_HDR_CALL_ID] = SIP_HDR("Call-Id", "i", NULL, callid_len),
299}; 341};
300 342
301static const char *sip_follow_continuation(const char *dptr, const char *limit) 343static const char *sip_follow_continuation(const char *dptr, const char *limit)
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index daab8c4a903c..4d87befb04c0 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -18,41 +18,6 @@
18#include <net/udp.h> 18#include <net/udp.h>
19#include <net/netfilter/nf_tproxy_core.h> 19#include <net/netfilter/nf_tproxy_core.h>
20 20
21struct sock *
22nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
23 const __be32 saddr, const __be32 daddr,
24 const __be16 sport, const __be16 dport,
25 const struct net_device *in, bool listening_only)
26{
27 struct sock *sk;
28
29 /* look up socket */
30 switch (protocol) {
31 case IPPROTO_TCP:
32 if (listening_only)
33 sk = __inet_lookup_listener(net, &tcp_hashinfo,
34 daddr, ntohs(dport),
35 in->ifindex);
36 else
37 sk = __inet_lookup(net, &tcp_hashinfo,
38 saddr, sport, daddr, dport,
39 in->ifindex);
40 break;
41 case IPPROTO_UDP:
42 sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
43 in->ifindex);
44 break;
45 default:
46 WARN_ON(1);
47 sk = NULL;
48 }
49
50 pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
51 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
52
53 return sk;
54}
55EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
56 21
57static void 22static void
58nf_tproxy_destructor(struct sk_buff *skb) 23nf_tproxy_destructor(struct sk_buff *skb)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e34622fa0003..80463507420e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -116,10 +116,8 @@ EXPORT_SYMBOL(xt_register_targets);
116void 116void
117xt_unregister_targets(struct xt_target *target, unsigned int n) 117xt_unregister_targets(struct xt_target *target, unsigned int n)
118{ 118{
119 unsigned int i; 119 while (n-- > 0)
120 120 xt_unregister_target(&target[n]);
121 for (i = 0; i < n; i++)
122 xt_unregister_target(&target[i]);
123} 121}
124EXPORT_SYMBOL(xt_unregister_targets); 122EXPORT_SYMBOL(xt_unregister_targets);
125 123
@@ -174,10 +172,8 @@ EXPORT_SYMBOL(xt_register_matches);
174void 172void
175xt_unregister_matches(struct xt_match *match, unsigned int n) 173xt_unregister_matches(struct xt_match *match, unsigned int n)
176{ 174{
177 unsigned int i; 175 while (n-- > 0)
178 176 xt_unregister_match(&match[n]);
179 for (i = 0; i < n; i++)
180 xt_unregister_match(&match[i]);
181} 177}
182EXPORT_SYMBOL(xt_unregister_matches); 178EXPORT_SYMBOL(xt_unregister_matches);
183 179
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index c61294d85fda..640678f47a2a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Transparent proxy support for Linux/iptables 2 * Transparent proxy support for Linux/iptables
3 * 3 *
4 * Copyright (c) 2006-2007 BalaBit IT Ltd. 4 * Copyright (c) 2006-2010 BalaBit IT Ltd.
5 * Author: Balazs Scheidler, Krisztian Kovacs 5 * Author: Balazs Scheidler, Krisztian Kovacs
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -16,19 +16,98 @@
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/udp.h> 17#include <net/udp.h>
18#include <net/inet_sock.h> 18#include <net/inet_sock.h>
19 19#include <linux/inetdevice.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter/xt_TPROXY.h>
23 22
24#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_TPROXY_HAVE_IPV6 1
27#include <net/if_inet6.h>
28#include <net/addrconf.h>
29#include <linux/netfilter_ipv6/ip6_tables.h>
30#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
31#endif
32
25#include <net/netfilter/nf_tproxy_core.h> 33#include <net/netfilter/nf_tproxy_core.h>
34#include <linux/netfilter/xt_TPROXY.h>
35
36static inline __be32
37tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
38{
39 struct in_device *indev;
40 __be32 laddr;
41
42 if (user_laddr)
43 return user_laddr;
44
45 laddr = 0;
46 rcu_read_lock();
47 indev = __in_dev_get_rcu(skb->dev);
48 for_primary_ifa(indev) {
49 laddr = ifa->ifa_local;
50 break;
51 } endfor_ifa(indev);
52 rcu_read_unlock();
53
54 return laddr ? laddr : daddr;
55}
56
57/**
58 * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections
59 * @skb: The skb being processed.
60 * @laddr: IPv4 address to redirect to or zero.
61 * @lport: TCP port to redirect to or zero.
62 * @sk: The TIME_WAIT TCP socket found by the lookup.
63 *
64 * We have to handle SYN packets arriving to TIME_WAIT sockets
65 * differently: instead of reopening the connection we should rather
66 * redirect the new connection to the proxy if there's a listener
67 * socket present.
68 *
69 * tproxy_handle_time_wait4() consumes the socket reference passed in.
70 *
71 * Returns the listener socket if there's one, the TIME_WAIT socket if
72 * no such listener is found, or NULL if the TCP header is incomplete.
73 */
74static struct sock *
75tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
76 struct sock *sk)
77{
78 const struct iphdr *iph = ip_hdr(skb);
79 struct tcphdr _hdr, *hp;
80
81 hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
82 if (hp == NULL) {
83 inet_twsk_put(inet_twsk(sk));
84 return NULL;
85 }
86
87 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
88 /* SYN to a TIME_WAIT socket, we'd rather redirect it
89 * to a listener socket if there's one */
90 struct sock *sk2;
91
92 sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
93 iph->saddr, laddr ? laddr : iph->daddr,
94 hp->source, lport ? lport : hp->dest,
95 skb->dev, NFT_LOOKUP_LISTENER);
96 if (sk2) {
97 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
98 inet_twsk_put(inet_twsk(sk));
99 sk = sk2;
100 }
101 }
102
103 return sk;
104}
26 105
27static unsigned int 106static unsigned int
28tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) 107tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
108 u_int32_t mark_mask, u_int32_t mark_value)
29{ 109{
30 const struct iphdr *iph = ip_hdr(skb); 110 const struct iphdr *iph = ip_hdr(skb);
31 const struct xt_tproxy_target_info *tgi = par->targinfo;
32 struct udphdr _hdr, *hp; 111 struct udphdr _hdr, *hp;
33 struct sock *sk; 112 struct sock *sk;
34 113
@@ -36,12 +115,195 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
36 if (hp == NULL) 115 if (hp == NULL)
37 return NF_DROP; 116 return NF_DROP;
38 117
118 /* check if there's an ongoing connection on the packet
119 * addresses, this happens if the redirect already happened
120 * and the current packet belongs to an already established
121 * connection */
39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 122 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
40 iph->saddr, 123 iph->saddr, iph->daddr,
41 tgi->laddr ? tgi->laddr : iph->daddr, 124 hp->source, hp->dest,
42 hp->source, 125 skb->dev, NFT_LOOKUP_ESTABLISHED);
43 tgi->lport ? tgi->lport : hp->dest, 126
44 par->in, true); 127 laddr = tproxy_laddr4(skb, laddr, iph->daddr);
128 if (!lport)
129 lport = hp->dest;
130
131 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
132 if (sk && sk->sk_state == TCP_TIME_WAIT)
133 /* reopening a TIME_WAIT connection needs special handling */
134 sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
135 else if (!sk)
136 /* no, there's no established connection, check if
137 * there's a listener on the redirected addr/port */
138 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
139 iph->saddr, laddr,
140 hp->source, lport,
141 skb->dev, NFT_LOOKUP_LISTENER);
142
143 /* NOTE: assign_sock consumes our sk reference */
144 if (sk && nf_tproxy_assign_sock(skb, sk)) {
145 /* This should be in a separate target, but we don't do multiple
146 targets on the same rule yet */
147 skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
148
149 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
150 iph->protocol, &iph->daddr, ntohs(hp->dest),
151 &laddr, ntohs(lport), skb->mark);
152 return NF_ACCEPT;
153 }
154
155 pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
156 iph->protocol, &iph->saddr, ntohs(hp->source),
157 &iph->daddr, ntohs(hp->dest), skb->mark);
158 return NF_DROP;
159}
160
161static unsigned int
162tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
163{
164 const struct xt_tproxy_target_info *tgi = par->targinfo;
165
166 return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
167}
168
169static unsigned int
170tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
171{
172 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
173
174 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
175}
176
177#ifdef XT_TPROXY_HAVE_IPV6
178
179static inline const struct in6_addr *
180tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
181 const struct in6_addr *daddr)
182{
183 struct inet6_dev *indev;
184 struct inet6_ifaddr *ifa;
185 struct in6_addr *laddr;
186
187 if (!ipv6_addr_any(user_laddr))
188 return user_laddr;
189 laddr = NULL;
190
191 rcu_read_lock();
192 indev = __in6_dev_get(skb->dev);
193 if (indev)
194 list_for_each_entry(ifa, &indev->addr_list, if_list) {
195 if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
196 continue;
197
198 laddr = &ifa->addr;
199 break;
200 }
201 rcu_read_unlock();
202
203 return laddr ? laddr : daddr;
204}
205
206/**
207 * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections
208 * @skb: The skb being processed.
209 * @tproto: Transport protocol.
210 * @thoff: Transport protocol header offset.
211 * @par: Iptables target parameters.
212 * @sk: The TIME_WAIT TCP socket found by the lookup.
213 *
214 * We have to handle SYN packets arriving to TIME_WAIT sockets
215 * differently: instead of reopening the connection we should rather
216 * redirect the new connection to the proxy if there's a listener
217 * socket present.
218 *
219 * tproxy_handle_time_wait6() consumes the socket reference passed in.
220 *
221 * Returns the listener socket if there's one, the TIME_WAIT socket if
222 * no such listener is found, or NULL if the TCP header is incomplete.
223 */
224static struct sock *
225tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
226 const struct xt_action_param *par,
227 struct sock *sk)
228{
229 const struct ipv6hdr *iph = ipv6_hdr(skb);
230 struct tcphdr _hdr, *hp;
231 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
232
233 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
234 if (hp == NULL) {
235 inet_twsk_put(inet_twsk(sk));
236 return NULL;
237 }
238
239 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
240 /* SYN to a TIME_WAIT socket, we'd rather redirect it
241 * to a listener socket if there's one */
242 struct sock *sk2;
243
244 sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
245 &iph->saddr,
246 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
247 hp->source,
248 tgi->lport ? tgi->lport : hp->dest,
249 skb->dev, NFT_LOOKUP_LISTENER);
250 if (sk2) {
251 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
252 inet_twsk_put(inet_twsk(sk));
253 sk = sk2;
254 }
255 }
256
257 return sk;
258}
259
260static unsigned int
261tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
262{
263 const struct ipv6hdr *iph = ipv6_hdr(skb);
264 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
265 struct udphdr _hdr, *hp;
266 struct sock *sk;
267 const struct in6_addr *laddr;
268 __be16 lport;
269 int thoff;
270 int tproto;
271
272 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
273 if (tproto < 0) {
274 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
275 return NF_DROP;
276 }
277
278 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
279 if (hp == NULL) {
280 pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
281 return NF_DROP;
282 }
283
284 /* check if there's an ongoing connection on the packet
285 * addresses, this happens if the redirect already happened
286 * and the current packet belongs to an already established
287 * connection */
288 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
289 &iph->saddr, &iph->daddr,
290 hp->source, hp->dest,
291 par->in, NFT_LOOKUP_ESTABLISHED);
292
293 laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
294 lport = tgi->lport ? tgi->lport : hp->dest;
295
296 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
297 if (sk && sk->sk_state == TCP_TIME_WAIT)
298 /* reopening a TIME_WAIT connection needs special handling */
299 sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
300 else if (!sk)
301 /* no there's no established connection, check if
302 * there's a listener on the redirected addr/port */
303 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
304 &iph->saddr, laddr,
305 hp->source, lport,
306 par->in, NFT_LOOKUP_LISTENER);
45 307
46 /* NOTE: assign_sock consumes our sk reference */ 308 /* NOTE: assign_sock consumes our sk reference */
47 if (sk && nf_tproxy_assign_sock(skb, sk)) { 309 if (sk && nf_tproxy_assign_sock(skb, sk)) {
@@ -49,19 +311,34 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
49 targets on the same rule yet */ 311 targets on the same rule yet */
50 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; 312 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
51 313
52 pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", 314 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
53 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 315 tproto, &iph->saddr, ntohs(hp->source),
54 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 316 laddr, ntohs(lport), skb->mark);
55 return NF_ACCEPT; 317 return NF_ACCEPT;
56 } 318 }
57 319
58 pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", 320 pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
59 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 321 tproto, &iph->saddr, ntohs(hp->source),
60 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 322 &iph->daddr, ntohs(hp->dest), skb->mark);
323
61 return NF_DROP; 324 return NF_DROP;
62} 325}
63 326
64static int tproxy_tg_check(const struct xt_tgchk_param *par) 327static int tproxy_tg6_check(const struct xt_tgchk_param *par)
328{
329 const struct ip6t_ip6 *i = par->entryinfo;
330
331 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
332 && !(i->flags & IP6T_INV_PROTO))
333 return 0;
334
335 pr_info("Can be used only in combination with "
336 "either -p tcp or -p udp\n");
337 return -EINVAL;
338}
339#endif
340
341static int tproxy_tg4_check(const struct xt_tgchk_param *par)
65{ 342{
66 const struct ipt_ip *i = par->entryinfo; 343 const struct ipt_ip *i = par->entryinfo;
67 344
@@ -74,31 +351,64 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
74 return -EINVAL; 351 return -EINVAL;
75} 352}
76 353
77static struct xt_target tproxy_tg_reg __read_mostly = { 354static struct xt_target tproxy_tg_reg[] __read_mostly = {
78 .name = "TPROXY", 355 {
79 .family = AF_INET, 356 .name = "TPROXY",
80 .table = "mangle", 357 .family = NFPROTO_IPV4,
81 .target = tproxy_tg, 358 .table = "mangle",
82 .targetsize = sizeof(struct xt_tproxy_target_info), 359 .target = tproxy_tg4_v0,
83 .checkentry = tproxy_tg_check, 360 .revision = 0,
84 .hooks = 1 << NF_INET_PRE_ROUTING, 361 .targetsize = sizeof(struct xt_tproxy_target_info),
85 .me = THIS_MODULE, 362 .checkentry = tproxy_tg4_check,
363 .hooks = 1 << NF_INET_PRE_ROUTING,
364 .me = THIS_MODULE,
365 },
366 {
367 .name = "TPROXY",
368 .family = NFPROTO_IPV4,
369 .table = "mangle",
370 .target = tproxy_tg4_v1,
371 .revision = 1,
372 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
373 .checkentry = tproxy_tg4_check,
374 .hooks = 1 << NF_INET_PRE_ROUTING,
375 .me = THIS_MODULE,
376 },
377#ifdef XT_TPROXY_HAVE_IPV6
378 {
379 .name = "TPROXY",
380 .family = NFPROTO_IPV6,
381 .table = "mangle",
382 .target = tproxy_tg6_v1,
383 .revision = 1,
384 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
385 .checkentry = tproxy_tg6_check,
386 .hooks = 1 << NF_INET_PRE_ROUTING,
387 .me = THIS_MODULE,
388 },
389#endif
390
86}; 391};
87 392
88static int __init tproxy_tg_init(void) 393static int __init tproxy_tg_init(void)
89{ 394{
90 nf_defrag_ipv4_enable(); 395 nf_defrag_ipv4_enable();
91 return xt_register_target(&tproxy_tg_reg); 396#ifdef XT_TPROXY_HAVE_IPV6
397 nf_defrag_ipv6_enable();
398#endif
399
400 return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
92} 401}
93 402
94static void __exit tproxy_tg_exit(void) 403static void __exit tproxy_tg_exit(void)
95{ 404{
96 xt_unregister_target(&tproxy_tg_reg); 405 xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
97} 406}
98 407
99module_init(tproxy_tg_init); 408module_init(tproxy_tg_init);
100module_exit(tproxy_tg_exit); 409module_exit(tproxy_tg_exit);
101MODULE_LICENSE("GPL"); 410MODULE_LICENSE("GPL");
102MODULE_AUTHOR("Krisztian Kovacs"); 411MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
103MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); 412MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
104MODULE_ALIAS("ipt_TPROXY"); 413MODULE_ALIAS("ipt_TPROXY");
414MODULE_ALIAS("ip6t_TPROXY");
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896d..9228ee0dc11a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
448{ 448{
449 __be16 _ports[2], *ports; 449 __be16 _ports[2], *ports;
450 u8 nexthdr; 450 u8 nexthdr;
451 int poff;
451 452
452 memset(dst, 0, sizeof(*dst)); 453 memset(dst, 0, sizeof(*dst));
453 454
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
492 return 0; 493 return 0;
493 } 494 }
494 495
495 switch (nexthdr) { 496 poff = proto_ports_offset(nexthdr);
496 case IPPROTO_TCP: 497 if (poff >= 0) {
497 case IPPROTO_UDP: 498 ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
498 case IPPROTO_UDPLITE:
499 case IPPROTO_SCTP:
500 case IPPROTO_DCCP:
501 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
502 &_ports); 499 &_ports);
503 break; 500 } else {
504 default:
505 _ports[0] = _ports[1] = 0; 501 _ports[0] = _ports[1] = 0;
506 ports = _ports; 502 ports = _ports;
507 break;
508 } 503 }
509 if (!ports) 504 if (!ports)
510 return -1; 505 return -1;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 7a4d66db95ae..9127a3d8aa35 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -16,7 +16,6 @@
16#include <linux/ip_vs.h> 16#include <linux/ip_vs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_ipvs.h> 19#include <linux/netfilter/xt_ipvs.h>
21#include <net/netfilter/nf_conntrack.h> 20#include <net/netfilter/nf_conntrack.h>
22 21
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 76aec6a44762..d2ff15a2412b 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -567,6 +567,7 @@ static const struct file_operations recent_mt_fops = {
567 .write = recent_mt_proc_write, 567 .write = recent_mt_proc_write,
568 .release = seq_release_private, 568 .release = seq_release_private,
569 .owner = THIS_MODULE, 569 .owner = THIS_MODULE,
570 .llseek = seq_lseek,
570}; 571};
571 572
572static int __net_init recent_proc_net_init(struct net *net) 573static int __net_init recent_proc_net_init(struct net *net)
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1ca89908cbad..00d6ae838303 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,6 +22,12 @@
22#include <net/netfilter/nf_tproxy_core.h> 22#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24 24
25#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
26#define XT_SOCKET_HAVE_IPV6 1
27#include <linux/netfilter_ipv6/ip6_tables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif
30
25#include <linux/netfilter/xt_socket.h> 31#include <linux/netfilter/xt_socket.h>
26 32
27#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 33#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -30,7 +36,7 @@
30#endif 36#endif
31 37
32static int 38static int
33extract_icmp_fields(const struct sk_buff *skb, 39extract_icmp4_fields(const struct sk_buff *skb,
34 u8 *protocol, 40 u8 *protocol,
35 __be32 *raddr, 41 __be32 *raddr,
36 __be32 *laddr, 42 __be32 *laddr,
@@ -86,7 +92,6 @@ extract_icmp_fields(const struct sk_buff *skb,
86 return 0; 92 return 0;
87} 93}
88 94
89
90static bool 95static bool
91socket_match(const struct sk_buff *skb, struct xt_action_param *par, 96socket_match(const struct sk_buff *skb, struct xt_action_param *par,
92 const struct xt_socket_mtinfo1 *info) 97 const struct xt_socket_mtinfo1 *info)
@@ -115,7 +120,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
115 dport = hp->dest; 120 dport = hp->dest;
116 121
117 } else if (iph->protocol == IPPROTO_ICMP) { 122 } else if (iph->protocol == IPPROTO_ICMP) {
118 if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, 123 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
119 &sport, &dport)) 124 &sport, &dport))
120 return false; 125 return false;
121 } else { 126 } else {
@@ -142,7 +147,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
142#endif 147#endif
143 148
144 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 149 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
145 saddr, daddr, sport, dport, par->in, false); 150 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
146 if (sk != NULL) { 151 if (sk != NULL) {
147 bool wildcard; 152 bool wildcard;
148 bool transparent = true; 153 bool transparent = true;
@@ -165,32 +170,156 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
165 sk = NULL; 170 sk = NULL;
166 } 171 }
167 172
168 pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n", 173 pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
169 protocol, ntohl(saddr), ntohs(sport), 174 protocol, &saddr, ntohs(sport),
170 ntohl(daddr), ntohs(dport), 175 &daddr, ntohs(dport),
171 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); 176 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
172 177
173 return (sk != NULL); 178 return (sk != NULL);
174} 179}
175 180
176static bool 181static bool
177socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) 182socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
178{ 183{
179 return socket_match(skb, par, NULL); 184 return socket_match(skb, par, NULL);
180} 185}
181 186
182static bool 187static bool
183socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) 188socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
184{ 189{
185 return socket_match(skb, par, par->matchinfo); 190 return socket_match(skb, par, par->matchinfo);
186} 191}
187 192
193#ifdef XT_SOCKET_HAVE_IPV6
194
195static int
196extract_icmp6_fields(const struct sk_buff *skb,
197 unsigned int outside_hdrlen,
198 int *protocol,
199 struct in6_addr **raddr,
200 struct in6_addr **laddr,
201 __be16 *rport,
202 __be16 *lport)
203{
204 struct ipv6hdr *inside_iph, _inside_iph;
205 struct icmp6hdr *icmph, _icmph;
206 __be16 *ports, _ports[2];
207 u8 inside_nexthdr;
208 int inside_hdrlen;
209
210 icmph = skb_header_pointer(skb, outside_hdrlen,
211 sizeof(_icmph), &_icmph);
212 if (icmph == NULL)
213 return 1;
214
215 if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
216 return 1;
217
218 inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph);
219 if (inside_iph == NULL)
220 return 1;
221 inside_nexthdr = inside_iph->nexthdr;
222
223 inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr);
224 if (inside_hdrlen < 0)
225 return 1; /* hjm: Packet has no/incomplete transport layer headers. */
226
227 if (inside_nexthdr != IPPROTO_TCP &&
228 inside_nexthdr != IPPROTO_UDP)
229 return 1;
230
231 ports = skb_header_pointer(skb, inside_hdrlen,
232 sizeof(_ports), &_ports);
233 if (ports == NULL)
234 return 1;
235
236 /* the inside IP packet is the one quoted from our side, thus
237 * its saddr is the local address */
238 *protocol = inside_nexthdr;
239 *laddr = &inside_iph->saddr;
240 *lport = ports[0];
241 *raddr = &inside_iph->daddr;
242 *rport = ports[1];
243
244 return 0;
245}
246
247static bool
248socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
249{
250 struct ipv6hdr *iph = ipv6_hdr(skb);
251 struct udphdr _hdr, *hp = NULL;
252 struct sock *sk;
253 struct in6_addr *daddr, *saddr;
254 __be16 dport, sport;
255 int thoff, tproto;
256 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
257
258 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
259 if (tproto < 0) {
260 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
261 return NF_DROP;
262 }
263
264 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
265 hp = skb_header_pointer(skb, thoff,
266 sizeof(_hdr), &_hdr);
267 if (hp == NULL)
268 return false;
269
270 saddr = &iph->saddr;
271 sport = hp->source;
272 daddr = &iph->daddr;
273 dport = hp->dest;
274
275 } else if (tproto == IPPROTO_ICMPV6) {
276 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
277 &sport, &dport))
278 return false;
279 } else {
280 return false;
281 }
282
283 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
284 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
285 if (sk != NULL) {
286 bool wildcard;
287 bool transparent = true;
288
289 /* Ignore sockets listening on INADDR_ANY */
290 wildcard = (sk->sk_state != TCP_TIME_WAIT &&
291 ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
292
293 /* Ignore non-transparent sockets,
294 if XT_SOCKET_TRANSPARENT is used */
295 if (info && info->flags & XT_SOCKET_TRANSPARENT)
296 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
297 inet_sk(sk)->transparent) ||
298 (sk->sk_state == TCP_TIME_WAIT &&
299 inet_twsk(sk)->tw_transparent));
300
301 nf_tproxy_put_sock(sk);
302
303 if (wildcard || !transparent)
304 sk = NULL;
305 }
306
307 pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
308 "(orig %pI6:%hu) sock %p\n",
309 tproto, saddr, ntohs(sport),
310 daddr, ntohs(dport),
311 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
312
313 return (sk != NULL);
314}
315#endif
316
188static struct xt_match socket_mt_reg[] __read_mostly = { 317static struct xt_match socket_mt_reg[] __read_mostly = {
189 { 318 {
190 .name = "socket", 319 .name = "socket",
191 .revision = 0, 320 .revision = 0,
192 .family = NFPROTO_IPV4, 321 .family = NFPROTO_IPV4,
193 .match = socket_mt_v0, 322 .match = socket_mt4_v0,
194 .hooks = (1 << NF_INET_PRE_ROUTING) | 323 .hooks = (1 << NF_INET_PRE_ROUTING) |
195 (1 << NF_INET_LOCAL_IN), 324 (1 << NF_INET_LOCAL_IN),
196 .me = THIS_MODULE, 325 .me = THIS_MODULE,
@@ -199,17 +328,33 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
199 .name = "socket", 328 .name = "socket",
200 .revision = 1, 329 .revision = 1,
201 .family = NFPROTO_IPV4, 330 .family = NFPROTO_IPV4,
202 .match = socket_mt_v1, 331 .match = socket_mt4_v1,
203 .matchsize = sizeof(struct xt_socket_mtinfo1), 332 .matchsize = sizeof(struct xt_socket_mtinfo1),
204 .hooks = (1 << NF_INET_PRE_ROUTING) | 333 .hooks = (1 << NF_INET_PRE_ROUTING) |
205 (1 << NF_INET_LOCAL_IN), 334 (1 << NF_INET_LOCAL_IN),
206 .me = THIS_MODULE, 335 .me = THIS_MODULE,
207 }, 336 },
337#ifdef XT_SOCKET_HAVE_IPV6
338 {
339 .name = "socket",
340 .revision = 1,
341 .family = NFPROTO_IPV6,
342 .match = socket_mt6_v1,
343 .matchsize = sizeof(struct xt_socket_mtinfo1),
344 .hooks = (1 << NF_INET_PRE_ROUTING) |
345 (1 << NF_INET_LOCAL_IN),
346 .me = THIS_MODULE,
347 },
348#endif
208}; 349};
209 350
210static int __init socket_mt_init(void) 351static int __init socket_mt_init(void)
211{ 352{
212 nf_defrag_ipv4_enable(); 353 nf_defrag_ipv4_enable();
354#ifdef XT_SOCKET_HAVE_IPV6
355 nf_defrag_ipv6_enable();
356#endif
357
213 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); 358 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
214} 359}
215 360
@@ -225,3 +370,4 @@ MODULE_LICENSE("GPL");
225MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); 370MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
226MODULE_DESCRIPTION("x_tables socket match module"); 371MODULE_DESCRIPTION("x_tables socket match module");
227MODULE_ALIAS("ipt_socket"); 372MODULE_ALIAS("ipt_socket");
373MODULE_ALIAS("ip6t_socket");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cd96ed3ccee4..478181d53c55 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,9 +83,9 @@ struct netlink_sock {
83 struct module *module; 83 struct module *module;
84}; 84};
85 85
86struct listeners_rcu_head { 86struct listeners {
87 struct rcu_head rcu_head; 87 struct rcu_head rcu;
88 void *ptr; 88 unsigned long masks[0];
89}; 89};
90 90
91#define NETLINK_KERNEL_SOCKET 0x1 91#define NETLINK_KERNEL_SOCKET 0x1
@@ -119,7 +119,7 @@ struct nl_pid_hash {
119struct netlink_table { 119struct netlink_table {
120 struct nl_pid_hash hash; 120 struct nl_pid_hash hash;
121 struct hlist_head mc_list; 121 struct hlist_head mc_list;
122 unsigned long *listeners; 122 struct listeners __rcu *listeners;
123 unsigned int nl_nonroot; 123 unsigned int nl_nonroot;
124 unsigned int groups; 124 unsigned int groups;
125 struct mutex *cb_mutex; 125 struct mutex *cb_mutex;
@@ -338,7 +338,7 @@ netlink_update_listeners(struct sock *sk)
338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups)) 338 if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
339 mask |= nlk_sk(sk)->groups[i]; 339 mask |= nlk_sk(sk)->groups[i];
340 } 340 }
341 tbl->listeners[i] = mask; 341 tbl->listeners->masks[i] = mask;
342 } 342 }
343 /* this function is only called with the netlink table "grabbed", which 343 /* this function is only called with the netlink table "grabbed", which
344 * makes sure updates are visible before bind or setsockopt return. */ 344 * makes sure updates are visible before bind or setsockopt return. */
@@ -936,7 +936,7 @@ EXPORT_SYMBOL(netlink_unicast);
936int netlink_has_listeners(struct sock *sk, unsigned int group) 936int netlink_has_listeners(struct sock *sk, unsigned int group)
937{ 937{
938 int res = 0; 938 int res = 0;
939 unsigned long *listeners; 939 struct listeners *listeners;
940 940
941 BUG_ON(!netlink_is_kernel(sk)); 941 BUG_ON(!netlink_is_kernel(sk));
942 942
@@ -944,7 +944,7 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners); 944 listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
945 945
946 if (group - 1 < nl_table[sk->sk_protocol].groups) 946 if (group - 1 < nl_table[sk->sk_protocol].groups)
947 res = test_bit(group - 1, listeners); 947 res = test_bit(group - 1, listeners->masks);
948 948
949 rcu_read_unlock(); 949 rcu_read_unlock();
950 950
@@ -1498,7 +1498,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1498 struct socket *sock; 1498 struct socket *sock;
1499 struct sock *sk; 1499 struct sock *sk;
1500 struct netlink_sock *nlk; 1500 struct netlink_sock *nlk;
1501 unsigned long *listeners = NULL; 1501 struct listeners *listeners = NULL;
1502 1502
1503 BUG_ON(!nl_table); 1503 BUG_ON(!nl_table);
1504 1504
@@ -1523,8 +1523,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1523 if (groups < 32) 1523 if (groups < 32)
1524 groups = 32; 1524 groups = 32;
1525 1525
1526 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 1526 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1527 GFP_KERNEL);
1528 if (!listeners) 1527 if (!listeners)
1529 goto out_sock_release; 1528 goto out_sock_release;
1530 1529
@@ -1541,7 +1540,7 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1541 netlink_table_grab(); 1540 netlink_table_grab();
1542 if (!nl_table[unit].registered) { 1541 if (!nl_table[unit].registered) {
1543 nl_table[unit].groups = groups; 1542 nl_table[unit].groups = groups;
1544 nl_table[unit].listeners = listeners; 1543 rcu_assign_pointer(nl_table[unit].listeners, listeners);
1545 nl_table[unit].cb_mutex = cb_mutex; 1544 nl_table[unit].cb_mutex = cb_mutex;
1546 nl_table[unit].module = module; 1545 nl_table[unit].module = module;
1547 nl_table[unit].registered = 1; 1546 nl_table[unit].registered = 1;
@@ -1572,43 +1571,28 @@ netlink_kernel_release(struct sock *sk)
1572EXPORT_SYMBOL(netlink_kernel_release); 1571EXPORT_SYMBOL(netlink_kernel_release);
1573 1572
1574 1573
1575static void netlink_free_old_listeners(struct rcu_head *rcu_head) 1574static void listeners_free_rcu(struct rcu_head *head)
1576{ 1575{
1577 struct listeners_rcu_head *lrh; 1576 kfree(container_of(head, struct listeners, rcu));
1578
1579 lrh = container_of(rcu_head, struct listeners_rcu_head, rcu_head);
1580 kfree(lrh->ptr);
1581} 1577}
1582 1578
1583int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1579int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1584{ 1580{
1585 unsigned long *listeners, *old = NULL; 1581 struct listeners *new, *old;
1586 struct listeners_rcu_head *old_rcu_head;
1587 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1582 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1588 1583
1589 if (groups < 32) 1584 if (groups < 32)
1590 groups = 32; 1585 groups = 32;
1591 1586
1592 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1587 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1593 listeners = kzalloc(NLGRPSZ(groups) + 1588 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1594 sizeof(struct listeners_rcu_head), 1589 if (!new)
1595 GFP_ATOMIC);
1596 if (!listeners)
1597 return -ENOMEM; 1590 return -ENOMEM;
1598 old = tbl->listeners; 1591 old = rcu_dereference_raw(tbl->listeners);
1599 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1592 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1600 rcu_assign_pointer(tbl->listeners, listeners); 1593 rcu_assign_pointer(tbl->listeners, new);
1601 /* 1594
1602 * Free the old memory after an RCU grace period so we 1595 call_rcu(&old->rcu, listeners_free_rcu);
1603 * don't leak it. We use call_rcu() here in order to be
1604 * able to call this function from atomic contexts. The
1605 * allocation of this memory will have reserved enough
1606 * space for struct listeners_rcu_head at the end.
1607 */
1608 old_rcu_head = (void *)(tbl->listeners +
1609 NLGRPLONGS(tbl->groups));
1610 old_rcu_head->ptr = old;
1611 call_rcu(&old_rcu_head->rcu_head, netlink_free_old_listeners);
1612 } 1596 }
1613 tbl->groups = groups; 1597 tbl->groups = groups;
1614 1598
@@ -2104,18 +2088,17 @@ static void __net_exit netlink_net_exit(struct net *net)
2104 2088
2105static void __init netlink_add_usersock_entry(void) 2089static void __init netlink_add_usersock_entry(void)
2106{ 2090{
2107 unsigned long *listeners; 2091 struct listeners *listeners;
2108 int groups = 32; 2092 int groups = 32;
2109 2093
2110 listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), 2094 listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2111 GFP_KERNEL);
2112 if (!listeners) 2095 if (!listeners)
2113 panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); 2096 panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2114 2097
2115 netlink_table_grab(); 2098 netlink_table_grab();
2116 2099
2117 nl_table[NETLINK_USERSOCK].groups = groups; 2100 nl_table[NETLINK_USERSOCK].groups = groups;
2118 nl_table[NETLINK_USERSOCK].listeners = listeners; 2101 rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2119 nl_table[NETLINK_USERSOCK].module = THIS_MODULE; 2102 nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2120 nl_table[NETLINK_USERSOCK].registered = 1; 2103 nl_table[NETLINK_USERSOCK].registered = 1;
2121 2104
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 26ed3e8587c2..1781d99145e2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -547,8 +547,20 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
547 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 547 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
548 info.attrs = family->attrbuf; 548 info.attrs = family->attrbuf;
549 genl_info_net_set(&info, net); 549 genl_info_net_set(&info, net);
550 memset(&info.user_ptr, 0, sizeof(info.user_ptr));
550 551
551 return ops->doit(skb, &info); 552 if (family->pre_doit) {
553 err = family->pre_doit(ops, skb, &info);
554 if (err)
555 return err;
556 }
557
558 err = ops->doit(skb, &info);
559
560 if (family->post_doit)
561 family->post_doit(ops, skb, &info);
562
563 return err;
552} 564}
553 565
554static void genl_rcv(struct sk_buff *skb) 566static void genl_rcv(struct sk_buff *skb)
diff --git a/net/nonet.c b/net/nonet.c
index 92e76640c7cd..b1a73fda9c12 100644
--- a/net/nonet.c
+++ b/net/nonet.c
@@ -22,4 +22,5 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
22const struct file_operations bad_sock_fops = { 22const struct file_operations bad_sock_fops = {
23 .owner = THIS_MODULE, 23 .owner = THIS_MODULE,
24 .open = sock_no_open, 24 .open = sock_no_open,
25 .llseek = noop_llseek,
25}; 26};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..8298e676f5a0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
488 skb->dev = dev; 488 skb->dev = dev;
489 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
490 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 491 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 if (err < 0) 492 if (err < 0)
493 goto out_unlock; 493 goto out_unlock;
494 494
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 if (err) 1210 if (err)
1211 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 1212 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 if (err < 0) 1213 if (err < 0)
1214 goto out_free; 1214 goto out_free;
1215 1215
@@ -1610,9 +1610,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1610 1610
1611 err = -EINVAL; 1611 err = -EINVAL;
1612 vnet_hdr_len = sizeof(vnet_hdr); 1612 vnet_hdr_len = sizeof(vnet_hdr);
1613 if ((len -= vnet_hdr_len) < 0) 1613 if (len < vnet_hdr_len)
1614 goto out_free; 1614 goto out_free;
1615 1615
1616 len -= vnet_hdr_len;
1617
1616 if (skb_is_gso(skb)) { 1618 if (skb_is_gso(skb)) {
1617 struct skb_shared_info *sinfo = skb_shinfo(skb); 1619 struct skb_shared_info *sinfo = skb_shinfo(skb);
1618 1620
@@ -1719,7 +1721,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1719 rcu_read_lock(); 1721 rcu_read_lock();
1720 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 1722 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1721 if (dev) 1723 if (dev)
1722 strlcpy(uaddr->sa_data, dev->name, 15); 1724 strncpy(uaddr->sa_data, dev->name, 14);
1723 else 1725 else
1724 memset(uaddr->sa_data, 0, 14); 1726 memset(uaddr->sa_data, 0, 14);
1725 rcu_read_unlock(); 1727 rcu_read_unlock();
@@ -1742,6 +1744,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1742 sll->sll_family = AF_PACKET; 1744 sll->sll_family = AF_PACKET;
1743 sll->sll_ifindex = po->ifindex; 1745 sll->sll_ifindex = po->ifindex;
1744 sll->sll_protocol = po->num; 1746 sll->sll_protocol = po->num;
1747 sll->sll_pkttype = 0;
1745 rcu_read_lock(); 1748 rcu_read_lock();
1746 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); 1749 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1747 if (dev) { 1750 if (dev) {
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55b1769..0d9b8a220a78 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,3 +14,15 @@ config PHONET
14 14
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called phonet. If unsure, say N. 16 will be called phonet. If unsure, say N.
17
18config PHONET_PIPECTRLR
19 bool "Phonet Pipe Controller (EXPERIMENTAL)"
20 depends on PHONET && EXPERIMENTAL
21 default N
22 help
23 The Pipe Controller implementation in Phonet stack to support Pipe
24 data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
25 platform.
26
27 This option is incompatible with older Nokia modems.
28 Say N here unless you really know what you are doing.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 73aee7f2fcdc..fd95beb72f5d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -251,6 +251,16 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 251 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 252 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 253 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) {
255 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net,
257 target->spn_resource);
258 if (sk) {
259 sock_put(sk);
260 dev = phonet_device_get(net);
261 skb->pkt_type = PACKET_LOOPBACK;
262 } else
263 dev = phonet_route_output(net, daddr);
254 } else 264 } else
255 dev = phonet_route_output(net, daddr); 265 dev = phonet_route_output(net, daddr);
256 266
@@ -383,6 +393,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
383 goto out; 393 goto out;
384 } 394 }
385 395
396 /* resource routing */
397 if (pn_sockaddr_get_object(&sa) == 0) {
398 struct sock *sk = pn_find_sock_by_res(net, sa.spn_resource);
399 if (sk)
400 return sk_receive_skb(sk, skb, 0);
401 }
402
386 /* check if we are the destination */ 403 /* check if we are the destination */
387 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { 404 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
388 /* Phonet packet input */ 405 /* Phonet packet input */
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 1bd38db4fe1e..2f032381bd45 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -52,6 +52,19 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
52 answ = skb ? skb->len : 0; 52 answ = skb ? skb->len : 0;
53 release_sock(sk); 53 release_sock(sk);
54 return put_user(answ, (int __user *)arg); 54 return put_user(answ, (int __user *)arg);
55
56 case SIOCPNADDRESOURCE:
57 case SIOCPNDELRESOURCE: {
58 u32 res;
59 if (get_user(res, (u32 __user *)arg))
60 return -EFAULT;
61 if (res >= 256)
62 return -EINVAL;
63 if (cmd == SIOCPNADDRESOURCE)
64 return pn_sock_bind_res(sk, res);
65 else
66 return pn_sock_unbind_res(sk, res);
67 }
55 } 68 }
56 69
57 return -ENOIOCTLCMD; 70 return -ENOIOCTLCMD;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 15003021f4f0..3e60f2e4e6c2 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -109,6 +109,210 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
109} 109}
110 110
111#define PAD 0x00 111#define PAD 0x00
112
113#ifdef CONFIG_PHONET_PIPECTRLR
114static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
115{
116 int i, j;
117 u8 base_fc, final_fc;
118
119 for (i = 0; i < len; i++) {
120 base_fc = host_fc[i];
121 for (j = 0; j < len; j++) {
122 if (remote_fc[j] == base_fc) {
123 final_fc = base_fc;
124 goto done;
125 }
126 }
127 }
128 return -EINVAL;
129
130done:
131 return final_fc;
132
133}
134
135static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
136 u8 *pref_rx_fc, u8 *req_tx_fc)
137{
138 struct pnpipehdr *hdr;
139 u8 n_sb;
140
141 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
142 return -EINVAL;
143
144 hdr = pnp_hdr(skb);
145 n_sb = hdr->data[4];
146
147 __skb_pull(skb, sizeof(*hdr) + 4);
148 while (n_sb > 0) {
149 u8 type, buf[3], len = sizeof(buf);
150 u8 *data = pep_get_sb(skb, &type, &len, buf);
151
152 if (data == NULL)
153 return -EINVAL;
154
155 switch (type) {
156 case PN_PIPE_SB_REQUIRED_FC_TX:
157 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
158 break;
159 req_tx_fc[0] = data[2];
160 req_tx_fc[1] = data[3];
161 req_tx_fc[2] = data[4];
162 break;
163
164 case PN_PIPE_SB_PREFERRED_FC_RX:
165 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
166 break;
167 pref_rx_fc[0] = data[2];
168 pref_rx_fc[1] = data[3];
169 pref_rx_fc[2] = data[4];
170 break;
171
172 }
173 n_sb--;
174 }
175 return 0;
176}
177
178static int pipe_handler_send_req(struct sock *sk, u8 utid,
179 u8 msg_id, gfp_t priority)
180{
181 int len;
182 struct pnpipehdr *ph;
183 struct sk_buff *skb;
184 struct pep_sock *pn = pep_sk(sk);
185
186 static const u8 data[4] = {
187 PAD, PAD, PAD, PAD,
188 };
189
190 switch (msg_id) {
191 case PNS_PEP_CONNECT_REQ:
192 len = sizeof(data);
193 break;
194
195 case PNS_PEP_DISCONNECT_REQ:
196 case PNS_PEP_ENABLE_REQ:
197 case PNS_PEP_DISABLE_REQ:
198 len = 0;
199 break;
200
201 default:
202 return -EINVAL;
203 }
204
205 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
206 if (!skb)
207 return -ENOMEM;
208 skb_set_owner_w(skb, sk);
209
210 skb_reserve(skb, MAX_PNPIPE_HEADER);
211 if (len) {
212 __skb_put(skb, len);
213 skb_copy_to_linear_data(skb, data, len);
214 }
215 __skb_push(skb, sizeof(*ph));
216 skb_reset_transport_header(skb);
217 ph = pnp_hdr(skb);
218 ph->utid = utid;
219 ph->message_id = msg_id;
220 ph->pipe_handle = pn->pipe_handle;
221 ph->error_code = PN_PIPE_NO_ERROR;
222
223 return pn_skb_send(sk, skb, &pn->remote_pep);
224}
225
226static int pipe_handler_send_created_ind(struct sock *sk,
227 u8 utid, u8 msg_id)
228{
229 int err_code;
230 struct pnpipehdr *ph;
231 struct sk_buff *skb;
232
233 struct pep_sock *pn = pep_sk(sk);
234 static u8 data[4] = {
235 0x03, 0x04,
236 };
237 data[2] = pn->tx_fc;
238 data[3] = pn->rx_fc;
239
240 /*
241 * actually, below is number of sub-blocks and not error code.
242 * Pipe_created_ind message format does not have any
243 * error code field. However, the Phonet stack will always send
244 * an error code as part of pnpipehdr. So, use that err_code to
245 * specify the number of sub-blocks.
246 */
247 err_code = 0x01;
248
249 skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
250 if (!skb)
251 return -ENOMEM;
252 skb_set_owner_w(skb, sk);
253
254 skb_reserve(skb, MAX_PNPIPE_HEADER);
255 __skb_put(skb, sizeof(data));
256 skb_copy_to_linear_data(skb, data, sizeof(data));
257 __skb_push(skb, sizeof(*ph));
258 skb_reset_transport_header(skb);
259 ph = pnp_hdr(skb);
260 ph->utid = utid;
261 ph->message_id = msg_id;
262 ph->pipe_handle = pn->pipe_handle;
263 ph->error_code = err_code;
264
265 return pn_skb_send(sk, skb, &pn->remote_pep);
266}
267
268static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id)
269{
270 int err_code;
271 struct pnpipehdr *ph;
272 struct sk_buff *skb;
273 struct pep_sock *pn = pep_sk(sk);
274
275 /*
276 * actually, below is a filler.
277 * Pipe_enabled/disabled_ind message format does not have any
278 * error code field. However, the Phonet stack will always send
279 * an error code as part of pnpipehdr. So, use that err_code to
280 * specify the filler value.
281 */
282 err_code = 0x0;
283
284 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
285 if (!skb)
286 return -ENOMEM;
287 skb_set_owner_w(skb, sk);
288
289 skb_reserve(skb, MAX_PNPIPE_HEADER);
290 __skb_push(skb, sizeof(*ph));
291 skb_reset_transport_header(skb);
292 ph = pnp_hdr(skb);
293 ph->utid = utid;
294 ph->message_id = msg_id;
295 ph->pipe_handle = pn->pipe_handle;
296 ph->error_code = err_code;
297
298 return pn_skb_send(sk, skb, &pn->remote_pep);
299}
300
301static int pipe_handler_enable_pipe(struct sock *sk, int enable)
302{
303 int utid, req;
304
305 if (enable) {
306 utid = PNS_PIPE_ENABLE_UTID;
307 req = PNS_PEP_ENABLE_REQ;
308 } else {
309 utid = PNS_PIPE_DISABLE_UTID;
310 req = PNS_PEP_DISABLE_REQ;
311 }
312 return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
313}
314#endif
315
112static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) 316static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
113{ 317{
114 static const u8 data[20] = { 318 static const u8 data[20] = {
@@ -192,7 +396,11 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
192 ph->data[3] = PAD; 396 ph->data[3] = PAD;
193 ph->data[4] = status; 397 ph->data[4] = status;
194 398
399#ifdef CONFIG_PHONET_PIPECTRLR
400 return pn_skb_send(sk, skb, &pn->remote_pep);
401#else
195 return pn_skb_send(sk, skb, &pipe_srv); 402 return pn_skb_send(sk, skb, &pipe_srv);
403#endif
196} 404}
197 405
198/* Send our RX flow control information to the sender. 406/* Send our RX flow control information to the sender.
@@ -324,11 +532,35 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
324 sk->sk_state_change(sk); 532 sk->sk_state_change(sk);
325 break; 533 break;
326 534
535#ifdef CONFIG_PHONET_PIPECTRLR
536 case PNS_PEP_DISCONNECT_RESP:
537 pn->pipe_state = PIPE_IDLE;
538 sk->sk_state = TCP_CLOSE;
539 break;
540#endif
541
327 case PNS_PEP_ENABLE_REQ: 542 case PNS_PEP_ENABLE_REQ:
328 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ 543 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
329 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 544 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
330 break; 545 break;
331 546
547#ifdef CONFIG_PHONET_PIPECTRLR
548 case PNS_PEP_ENABLE_RESP:
549 pn->pipe_state = PIPE_ENABLED;
550 pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
551 PNS_PIPE_ENABLED_IND);
552
553 if (!pn_flow_safe(pn->tx_fc)) {
554 atomic_set(&pn->tx_credits, 1);
555 sk->sk_write_space(sk);
556 }
557 if (sk->sk_state == TCP_ESTABLISHED)
558 break; /* Nothing to do */
559 sk->sk_state = TCP_ESTABLISHED;
560 pipe_grant_credits(sk);
561 break;
562#endif
563
332 case PNS_PEP_RESET_REQ: 564 case PNS_PEP_RESET_REQ:
333 switch (hdr->state_after_reset) { 565 switch (hdr->state_after_reset) {
334 case PN_PIPE_DISABLE: 566 case PN_PIPE_DISABLE:
@@ -347,6 +579,17 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
347 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 579 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
348 break; 580 break;
349 581
582#ifdef CONFIG_PHONET_PIPECTRLR
583 case PNS_PEP_DISABLE_RESP:
584 pn->pipe_state = PIPE_DISABLED;
585 atomic_set(&pn->tx_credits, 0);
586 pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
587 PNS_PIPE_DISABLED_IND);
588 sk->sk_state = TCP_SYN_RECV;
589 pn->rx_credits = 0;
590 break;
591#endif
592
350 case PNS_PEP_CTRL_REQ: 593 case PNS_PEP_CTRL_REQ:
351 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { 594 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
352 atomic_inc(&sk->sk_drops); 595 atomic_inc(&sk->sk_drops);
@@ -438,6 +681,42 @@ static void pipe_destruct(struct sock *sk)
438 skb_queue_purge(&pn->ctrlreq_queue); 681 skb_queue_purge(&pn->ctrlreq_queue);
439} 682}
440 683
684#ifdef CONFIG_PHONET_PIPECTRLR
685static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
686{
687 struct pep_sock *pn = pep_sk(sk);
688 u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1};
689 u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
690 u8 negotiated_rx_fc, negotiated_tx_fc;
691 int ret;
692
693 pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
694 remote_req_tx_fc);
695 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
696 host_pref_rx_fc,
697 sizeof(host_pref_rx_fc));
698 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
699 remote_pref_rx_fc,
700 sizeof(host_pref_rx_fc));
701
702 pn->pipe_state = PIPE_DISABLED;
703 sk->sk_state = TCP_SYN_RECV;
704 sk->sk_backlog_rcv = pipe_do_rcv;
705 sk->sk_destruct = pipe_destruct;
706 pn->rx_credits = 0;
707 pn->rx_fc = negotiated_rx_fc;
708 pn->tx_fc = negotiated_tx_fc;
709 sk->sk_state_change(sk);
710
711 ret = pipe_handler_send_created_ind(sk,
712 PNS_PIPE_CREATED_IND_UTID,
713 PNS_PIPE_CREATED_IND
714 );
715
716 return ret;
717}
718#endif
719
441static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) 720static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
442{ 721{
443 struct sock *newsk; 722 struct sock *newsk;
@@ -601,6 +880,12 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
601 err = pep_connreq_rcv(sk, skb); 880 err = pep_connreq_rcv(sk, skb);
602 break; 881 break;
603 882
883#ifdef CONFIG_PHONET_PIPECTRLR
884 case PNS_PEP_CONNECT_RESP:
885 err = pep_connresp_rcv(sk, skb);
886 break;
887#endif
888
604 case PNS_PEP_DISCONNECT_REQ: 889 case PNS_PEP_DISCONNECT_REQ:
605 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 890 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
606 break; 891 break;
@@ -621,6 +906,28 @@ drop:
621 return err; 906 return err;
622} 907}
623 908
909static int pipe_do_remove(struct sock *sk)
910{
911 struct pep_sock *pn = pep_sk(sk);
912 struct pnpipehdr *ph;
913 struct sk_buff *skb;
914
915 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
916 if (!skb)
917 return -ENOMEM;
918
919 skb_reserve(skb, MAX_PNPIPE_HEADER);
920 __skb_push(skb, sizeof(*ph));
921 skb_reset_transport_header(skb);
922 ph = pnp_hdr(skb);
923 ph->utid = 0;
924 ph->message_id = PNS_PIPE_REMOVE_REQ;
925 ph->pipe_handle = pn->pipe_handle;
926 ph->data[0] = PAD;
927
928 return pn_skb_send(sk, skb, &pipe_srv);
929}
930
624/* associated socket ceases to exist */ 931/* associated socket ceases to exist */
625static void pep_sock_close(struct sock *sk, long timeout) 932static void pep_sock_close(struct sock *sk, long timeout)
626{ 933{
@@ -639,7 +946,22 @@ static void pep_sock_close(struct sock *sk, long timeout)
639 sk_for_each_safe(sknode, p, n, &pn->ackq) 946 sk_for_each_safe(sknode, p, n, &pn->ackq)
640 sk_del_node_init(sknode); 947 sk_del_node_init(sknode);
641 sk->sk_state = TCP_CLOSE; 948 sk->sk_state = TCP_CLOSE;
949 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
950 /* Forcefully remove dangling Phonet pipe */
951 pipe_do_remove(sk);
952
953#ifdef CONFIG_PHONET_PIPECTRLR
954 if (pn->pipe_state != PIPE_IDLE) {
955 /* send pep disconnect request */
956 pipe_handler_send_req(sk,
957 PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
958 GFP_KERNEL);
959
960 pn->pipe_state = PIPE_IDLE;
961 sk->sk_state = TCP_CLOSE;
642 } 962 }
963#endif
964
643 ifindex = pn->ifindex; 965 ifindex = pn->ifindex;
644 pn->ifindex = 0; 966 pn->ifindex = 0;
645 release_sock(sk); 967 release_sock(sk);
@@ -716,6 +1038,20 @@ out:
716 return newsk; 1038 return newsk;
717} 1039}
718 1040
1041#ifdef CONFIG_PHONET_PIPECTRLR
1042static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
1043{
1044 struct pep_sock *pn = pep_sk(sk);
1045 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
1046
1047 memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
1048
1049 return pipe_handler_send_req(sk,
1050 PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
1051 GFP_ATOMIC);
1052}
1053#endif
1054
719static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) 1055static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
720{ 1056{
721 struct pep_sock *pn = pep_sk(sk); 1057 struct pep_sock *pn = pep_sk(sk);
@@ -767,6 +1103,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
767 1103
768 lock_sock(sk); 1104 lock_sock(sk);
769 switch (optname) { 1105 switch (optname) {
1106#ifdef CONFIG_PHONET_PIPECTRLR
1107 case PNPIPE_PIPE_HANDLE:
1108 if (val) {
1109 if (pn->pipe_state > PIPE_IDLE) {
1110 err = -EFAULT;
1111 break;
1112 }
1113 pn->pipe_handle = val;
1114 break;
1115 }
1116#endif
1117
770 case PNPIPE_ENCAP: 1118 case PNPIPE_ENCAP:
771 if (val && val != PNPIPE_ENCAP_IP) { 1119 if (val && val != PNPIPE_ENCAP_IP) {
772 err = -EINVAL; 1120 err = -EINVAL;
@@ -792,6 +1140,17 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
792 err = 0; 1140 err = 0;
793 } 1141 }
794 goto out_norel; 1142 goto out_norel;
1143
1144#ifdef CONFIG_PHONET_PIPECTRLR
1145 case PNPIPE_ENABLE:
1146 if (pn->pipe_state <= PIPE_IDLE) {
1147 err = -ENOTCONN;
1148 break;
1149 }
1150 err = pipe_handler_enable_pipe(sk, val);
1151 break;
1152#endif
1153
795 default: 1154 default:
796 err = -ENOPROTOOPT; 1155 err = -ENOPROTOOPT;
797 } 1156 }
@@ -816,9 +1175,19 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
816 case PNPIPE_ENCAP: 1175 case PNPIPE_ENCAP:
817 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; 1176 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
818 break; 1177 break;
1178
819 case PNPIPE_IFINDEX: 1179 case PNPIPE_IFINDEX:
820 val = pn->ifindex; 1180 val = pn->ifindex;
821 break; 1181 break;
1182
1183#ifdef CONFIG_PHONET_PIPECTRLR
1184 case PNPIPE_ENABLE:
1185 if (pn->pipe_state <= PIPE_IDLE)
1186 return -ENOTCONN;
1187 val = pn->pipe_state != PIPE_DISABLED;
1188 break;
1189#endif
1190
822 default: 1191 default:
823 return -ENOPROTOOPT; 1192 return -ENOPROTOOPT;
824 } 1193 }
@@ -835,6 +1204,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
835{ 1204{
836 struct pep_sock *pn = pep_sk(sk); 1205 struct pep_sock *pn = pep_sk(sk);
837 struct pnpipehdr *ph; 1206 struct pnpipehdr *ph;
1207 int err;
838 1208
839 if (pn_flow_safe(pn->tx_fc) && 1209 if (pn_flow_safe(pn->tx_fc) &&
840 !atomic_add_unless(&pn->tx_credits, -1, 0)) { 1210 !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -852,8 +1222,16 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
852 } else 1222 } else
853 ph->message_id = PNS_PIPE_DATA; 1223 ph->message_id = PNS_PIPE_DATA;
854 ph->pipe_handle = pn->pipe_handle; 1224 ph->pipe_handle = pn->pipe_handle;
1225#ifdef CONFIG_PHONET_PIPECTRLR
1226 err = pn_skb_send(sk, skb, &pn->remote_pep);
1227#else
1228 err = pn_skb_send(sk, skb, &pipe_srv);
1229#endif
1230
1231 if (err && pn_flow_safe(pn->tx_fc))
1232 atomic_inc(&pn->tx_credits);
1233 return err;
855 1234
856 return pn_skb_send(sk, skb, &pipe_srv);
857} 1235}
858 1236
859static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, 1237static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -873,7 +1251,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
873 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, 1251 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
874 flags & MSG_DONTWAIT, &err); 1252 flags & MSG_DONTWAIT, &err);
875 if (!skb) 1253 if (!skb)
876 return -ENOBUFS; 1254 return err;
877 1255
878 skb_reserve(skb, MAX_PHONET_HEADER + 3); 1256 skb_reserve(skb, MAX_PHONET_HEADER + 3);
879 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1257 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
@@ -1045,6 +1423,8 @@ static void pep_sock_unhash(struct sock *sk)
1045 struct sock *skparent = NULL; 1423 struct sock *skparent = NULL;
1046 1424
1047 lock_sock(sk); 1425 lock_sock(sk);
1426
1427#ifndef CONFIG_PHONET_PIPECTRLR
1048 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { 1428 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1049 skparent = pn->listener; 1429 skparent = pn->listener;
1050 release_sock(sk); 1430 release_sock(sk);
@@ -1054,6 +1434,7 @@ static void pep_sock_unhash(struct sock *sk)
1054 sk_del_node_init(sk); 1434 sk_del_node_init(sk);
1055 sk = skparent; 1435 sk = skparent;
1056 } 1436 }
1437#endif
1057 /* Unhash a listening sock only when it is closed 1438 /* Unhash a listening sock only when it is closed
1058 * and all of its active connected pipes are closed. */ 1439 * and all of its active connected pipes are closed. */
1059 if (hlist_empty(&pn->hlist)) 1440 if (hlist_empty(&pn->hlist))
@@ -1067,6 +1448,9 @@ static void pep_sock_unhash(struct sock *sk)
1067static struct proto pep_proto = { 1448static struct proto pep_proto = {
1068 .close = pep_sock_close, 1449 .close = pep_sock_close,
1069 .accept = pep_sock_accept, 1450 .accept = pep_sock_accept,
1451#ifdef CONFIG_PHONET_PIPECTRLR
1452 .connect = pep_sock_connect,
1453#endif
1070 .ioctl = pep_ioctl, 1454 .ioctl = pep_ioctl,
1071 .init = pep_init, 1455 .init = pep_init,
1072 .setsockopt = pep_setsockopt, 1456 .setsockopt = pep_setsockopt,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b18e48fae975..947038ddd04c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,8 +292,7 @@ static void phonet_route_autodel(struct net_device *dev)
292 if (bitmap_empty(deleted, 64)) 292 if (bitmap_empty(deleted, 64))
293 return; /* short-circuit RCU */ 293 return; /* short-circuit RCU */
294 synchronize_rcu(); 294 synchronize_rcu();
295 for (i = find_first_bit(deleted, 64); i < 64; 295 for_each_set_bit(i, deleted, 64) {
296 i = find_next_bit(deleted, 64, i + 1)) {
297 rtm_phonet_notify(RTM_DELROUTE, dev, i); 296 rtm_phonet_notify(RTM_DELROUTE, dev, i);
298 dev_put(dev); 297 dev_put(dev);
299 } 298 }
@@ -374,6 +373,7 @@ int __init phonet_device_init(void)
374 if (err) 373 if (err)
375 return err; 374 return err;
376 375
376 proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
377 register_netdevice_notifier(&phonet_device_notifier); 377 register_netdevice_notifier(&phonet_device_notifier);
378 err = phonet_netlink_register(); 378 err = phonet_netlink_register();
379 if (err) 379 if (err)
@@ -386,6 +386,7 @@ void phonet_device_exit(void)
386 rtnl_unregister_all(PF_PHONET); 386 rtnl_unregister_all(PF_PHONET);
387 unregister_netdevice_notifier(&phonet_device_notifier); 387 unregister_netdevice_notifier(&phonet_device_notifier);
388 unregister_pernet_device(&phonet_net_ops); 388 unregister_pernet_device(&phonet_net_ops);
389 proc_net_remove(&init_net, "pnresource");
389} 390}
390 391
391int phonet_route_add(struct net_device *dev, u8 daddr) 392int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 6e9848bf0370..25f746d20c1f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -158,6 +158,7 @@ void pn_sock_unhash(struct sock *sk)
158 spin_lock_bh(&pnsocks.lock); 158 spin_lock_bh(&pnsocks.lock);
159 sk_del_node_init(sk); 159 sk_del_node_init(sk);
160 spin_unlock_bh(&pnsocks.lock); 160 spin_unlock_bh(&pnsocks.lock);
161 pn_sock_unbind_all_res(sk);
161} 162}
162EXPORT_SYMBOL(pn_sock_unhash); 163EXPORT_SYMBOL(pn_sock_unhash);
163 164
@@ -224,6 +225,101 @@ static int pn_socket_autobind(struct socket *sock)
224 return 0; /* socket was already bound */ 225 return 0; /* socket was already bound */
225} 226}
226 227
228#ifdef CONFIG_PHONET_PIPECTRLR
229static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
230 int len, int flags)
231{
232 struct sock *sk = sock->sk;
233 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
234 long timeo;
235 int err;
236
237 if (len < sizeof(struct sockaddr_pn))
238 return -EINVAL;
239 if (spn->spn_family != AF_PHONET)
240 return -EAFNOSUPPORT;
241
242 lock_sock(sk);
243
244 switch (sock->state) {
245 case SS_UNCONNECTED:
246 sk->sk_state = TCP_CLOSE;
247 break;
248 case SS_CONNECTING:
249 switch (sk->sk_state) {
250 case TCP_SYN_RECV:
251 sock->state = SS_CONNECTED;
252 err = -EISCONN;
253 goto out;
254 case TCP_CLOSE:
255 err = -EALREADY;
256 if (flags & O_NONBLOCK)
257 goto out;
258 goto wait_connect;
259 }
260 break;
261 case SS_CONNECTED:
262 switch (sk->sk_state) {
263 case TCP_SYN_RECV:
264 err = -EISCONN;
265 goto out;
266 case TCP_CLOSE:
267 sock->state = SS_UNCONNECTED;
268 break;
269 }
270 break;
271 case SS_DISCONNECTING:
272 case SS_FREE:
273 break;
274 }
275 sk->sk_state = TCP_CLOSE;
276 sk_stream_kill_queues(sk);
277
278 sock->state = SS_CONNECTING;
279 err = sk->sk_prot->connect(sk, addr, len);
280 if (err < 0) {
281 sock->state = SS_UNCONNECTED;
282 sk->sk_state = TCP_CLOSE;
283 goto out;
284 }
285
286 err = -EINPROGRESS;
287wait_connect:
288 if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
289 goto out;
290
291 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
292 release_sock(sk);
293
294 err = -ERESTARTSYS;
295 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
296 sk->sk_state != TCP_CLOSE,
297 timeo);
298
299 lock_sock(sk);
300 if (timeo < 0)
301 goto out; /* -ERESTARTSYS */
302
303 err = -ETIMEDOUT;
304 if (timeo == 0 && sk->sk_state != TCP_SYN_RECV)
305 goto out;
306
307 if (sk->sk_state != TCP_SYN_RECV) {
308 sock->state = SS_UNCONNECTED;
309 err = sock_error(sk);
310 if (!err)
311 err = -ECONNREFUSED;
312 goto out;
313 }
314 sock->state = SS_CONNECTED;
315 err = 0;
316
317out:
318 release_sock(sk);
319 return err;
320}
321#endif
322
227static int pn_socket_accept(struct socket *sock, struct socket *newsock, 323static int pn_socket_accept(struct socket *sock, struct socket *newsock,
228 int flags) 324 int flags)
229{ 325{
@@ -281,7 +377,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 377 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
282 return POLLHUP; 378 return POLLHUP;
283 379
284 if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) 380 if (sk->sk_state == TCP_ESTABLISHED &&
381 atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
382 atomic_read(&pn->tx_credits))
285 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 383 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
286 384
287 return mask; 385 return mask;
@@ -390,7 +488,11 @@ const struct proto_ops phonet_stream_ops = {
390 .owner = THIS_MODULE, 488 .owner = THIS_MODULE,
391 .release = pn_socket_release, 489 .release = pn_socket_release,
392 .bind = pn_socket_bind, 490 .bind = pn_socket_bind,
491#ifdef CONFIG_PHONET_PIPECTRLR
492 .connect = pn_socket_connect,
493#else
393 .connect = sock_no_connect, 494 .connect = sock_no_connect,
495#endif
394 .socketpair = sock_no_socketpair, 496 .socketpair = sock_no_socketpair,
395 .accept = pn_socket_accept, 497 .accept = pn_socket_accept,
396 .getname = pn_socket_getname, 498 .getname = pn_socket_getname,
@@ -563,3 +665,188 @@ const struct file_operations pn_sock_seq_fops = {
563 .release = seq_release_net, 665 .release = seq_release_net,
564}; 666};
565#endif 667#endif
668
669static struct {
670 struct sock *sk[256];
671} pnres;
672
673/*
674 * Find and hold socket based on resource.
675 */
676struct sock *pn_find_sock_by_res(struct net *net, u8 res)
677{
678 struct sock *sk;
679
680 if (!net_eq(net, &init_net))
681 return NULL;
682
683 rcu_read_lock();
684 sk = rcu_dereference(pnres.sk[res]);
685 if (sk)
686 sock_hold(sk);
687 rcu_read_unlock();
688 return sk;
689}
690
691static DEFINE_MUTEX(resource_mutex);
692
693int pn_sock_bind_res(struct sock *sk, u8 res)
694{
695 int ret = -EADDRINUSE;
696
697 if (!net_eq(sock_net(sk), &init_net))
698 return -ENOIOCTLCMD;
699 if (!capable(CAP_SYS_ADMIN))
700 return -EPERM;
701 if (pn_socket_autobind(sk->sk_socket))
702 return -EAGAIN;
703
704 mutex_lock(&resource_mutex);
705 if (pnres.sk[res] == NULL) {
706 sock_hold(sk);
707 rcu_assign_pointer(pnres.sk[res], sk);
708 ret = 0;
709 }
710 mutex_unlock(&resource_mutex);
711 return ret;
712}
713
714int pn_sock_unbind_res(struct sock *sk, u8 res)
715{
716 int ret = -ENOENT;
717
718 if (!capable(CAP_SYS_ADMIN))
719 return -EPERM;
720
721 mutex_lock(&resource_mutex);
722 if (pnres.sk[res] == sk) {
723 rcu_assign_pointer(pnres.sk[res], NULL);
724 ret = 0;
725 }
726 mutex_unlock(&resource_mutex);
727
728 if (ret == 0) {
729 synchronize_rcu();
730 sock_put(sk);
731 }
732 return ret;
733}
734
735void pn_sock_unbind_all_res(struct sock *sk)
736{
737 unsigned res, match = 0;
738
739 mutex_lock(&resource_mutex);
740 for (res = 0; res < 256; res++) {
741 if (pnres.sk[res] == sk) {
742 rcu_assign_pointer(pnres.sk[res], NULL);
743 match++;
744 }
745 }
746 mutex_unlock(&resource_mutex);
747
748 if (match == 0)
749 return;
750 synchronize_rcu();
751 while (match > 0) {
752 sock_put(sk);
753 match--;
754 }
755}
756
757#ifdef CONFIG_PROC_FS
758static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
759{
760 struct net *net = seq_file_net(seq);
761 unsigned i;
762
763 if (!net_eq(net, &init_net))
764 return NULL;
765
766 for (i = 0; i < 256; i++) {
767 if (pnres.sk[i] == NULL)
768 continue;
769 if (!pos)
770 return pnres.sk + i;
771 pos--;
772 }
773 return NULL;
774}
775
776static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
777{
778 struct net *net = seq_file_net(seq);
779 unsigned i;
780
781 BUG_ON(!net_eq(net, &init_net));
782
783 for (i = (sk - pnres.sk) + 1; i < 256; i++)
784 if (pnres.sk[i])
785 return pnres.sk + i;
786 return NULL;
787}
788
789static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
790 __acquires(resource_mutex)
791{
792 mutex_lock(&resource_mutex);
793 return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
794}
795
796static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
797{
798 struct sock **sk;
799
800 if (v == SEQ_START_TOKEN)
801 sk = pn_res_get_idx(seq, 0);
802 else
803 sk = pn_res_get_next(seq, v);
804 (*pos)++;
805 return sk;
806}
807
808static void pn_res_seq_stop(struct seq_file *seq, void *v)
809 __releases(resource_mutex)
810{
811 mutex_unlock(&resource_mutex);
812}
813
814static int pn_res_seq_show(struct seq_file *seq, void *v)
815{
816 int len;
817
818 if (v == SEQ_START_TOKEN)
819 seq_printf(seq, "%s%n", "rs uid inode", &len);
820 else {
821 struct sock **psk = v;
822 struct sock *sk = *psk;
823
824 seq_printf(seq, "%02X %5d %lu%n",
825 (int) (psk - pnres.sk), sock_i_uid(sk),
826 sock_i_ino(sk), &len);
827 }
828 seq_printf(seq, "%*s\n", 63 - len, "");
829 return 0;
830}
831
832static const struct seq_operations pn_res_seq_ops = {
833 .start = pn_res_seq_start,
834 .next = pn_res_seq_next,
835 .stop = pn_res_seq_stop,
836 .show = pn_res_seq_show,
837};
838
839static int pn_res_open(struct inode *inode, struct file *file)
840{
841 return seq_open_net(inode, file, &pn_res_seq_ops,
842 sizeof(struct seq_net_private));
843}
844
845const struct file_operations pn_res_seq_fops = {
846 .owner = THIS_MODULE,
847 .open = pn_res_open,
848 .read = seq_read,
849 .llseek = seq_lseek,
850 .release = seq_release_net,
851};
852#endif
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index aebfecbdb841..bb6ad81b671d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -39,7 +39,15 @@
39#include <net/sock.h> 39#include <net/sock.h>
40 40
41#include "rds.h" 41#include "rds.h"
42#include "rdma.h" 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
43 51
44/* this is just used for stats gathering :/ */ 52/* this is just used for stats gathering :/ */
45static DEFINE_SPINLOCK(rds_sock_lock); 53static DEFINE_SPINLOCK(rds_sock_lock);
@@ -62,7 +70,7 @@ static int rds_release(struct socket *sock)
62 struct rds_sock *rs; 70 struct rds_sock *rs;
63 unsigned long flags; 71 unsigned long flags;
64 72
65 if (sk == NULL) 73 if (!sk)
66 goto out; 74 goto out;
67 75
68 rs = rds_sk_to_rs(sk); 76 rs = rds_sk_to_rs(sk);
@@ -73,7 +81,15 @@ static int rds_release(struct socket *sock)
73 * with the socket. */ 81 * with the socket. */
74 rds_clear_recv_queue(rs); 82 rds_clear_recv_queue(rs);
75 rds_cong_remove_socket(rs); 83 rds_cong_remove_socket(rs);
84
85 /*
86 * the binding lookup hash uses rcu, we need to
87 * make sure we sychronize_rcu before we free our
88 * entry
89 */
76 rds_remove_bound(rs); 90 rds_remove_bound(rs);
91 synchronize_rcu();
92
77 rds_send_drop_to(rs, NULL); 93 rds_send_drop_to(rs, NULL);
78 rds_rdma_drop_keys(rs); 94 rds_rdma_drop_keys(rs);
79 rds_notify_queue_get(rs, NULL); 95 rds_notify_queue_get(rs, NULL);
@@ -83,6 +99,8 @@ static int rds_release(struct socket *sock)
83 rds_sock_count--; 99 rds_sock_count--;
84 spin_unlock_irqrestore(&rds_sock_lock, flags); 100 spin_unlock_irqrestore(&rds_sock_lock, flags);
85 101
102 rds_trans_put(rs->rs_transport);
103
86 sock->sk = NULL; 104 sock->sk = NULL;
87 sock_put(sk); 105 sock_put(sk);
88out: 106out:
@@ -514,7 +532,7 @@ out:
514 spin_unlock_irqrestore(&rds_sock_lock, flags); 532 spin_unlock_irqrestore(&rds_sock_lock, flags);
515} 533}
516 534
517static void __exit rds_exit(void) 535static void rds_exit(void)
518{ 536{
519 sock_unregister(rds_family_ops.family); 537 sock_unregister(rds_family_ops.family);
520 proto_unregister(&rds_proto); 538 proto_unregister(&rds_proto);
@@ -529,7 +547,7 @@ static void __exit rds_exit(void)
529} 547}
530module_exit(rds_exit); 548module_exit(rds_exit);
531 549
532static int __init rds_init(void) 550static int rds_init(void)
533{ 551{
534 int ret; 552 int ret;
535 553
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5d95fc007f1a..2f6b3fcc79f8 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -34,45 +34,52 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/if_arp.h> 36#include <linux/if_arp.h>
37#include <linux/jhash.h>
37#include "rds.h" 38#include "rds.h"
38 39
39/* 40#define BIND_HASH_SIZE 1024
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't 41static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock); 42static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48 43
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, 44static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
50 struct rds_sock *insert) 45{
46 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
47 (BIND_HASH_SIZE - 1));
48}
49
50static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
51 struct rds_sock *insert)
51{ 52{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs; 53 struct rds_sock *rs;
54 struct hlist_node *node;
55 struct hlist_head *head = hash_to_bucket(addr, port);
55 u64 cmp; 56 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); 57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57 58
58 while (*p) { 59 rcu_read_lock();
59 parent = *p; 60 hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | 61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port); 62 be16_to_cpu(rs->rs_bound_port);
64 63
65 if (needle < cmp) 64 if (cmp == needle) {
66 p = &(*p)->rb_left; 65 rcu_read_unlock();
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs; 66 return rs;
67 }
71 } 68 }
69 rcu_read_unlock();
72 70
73 if (insert) { 71 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p); 72 /*
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); 73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
76 } 83 }
77 return NULL; 84 return NULL;
78} 85}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 93struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{ 94{
88 struct rds_sock *rs; 95 struct rds_sock *rs;
89 unsigned long flags;
90 96
91 spin_lock_irqsave(&rds_bind_lock, flags); 97 rs = rds_bind_lookup(addr, port, NULL);
92 rs = rds_bind_tree_walk(addr, port, NULL); 98
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs); 100 rds_sock_addref(rs);
95 else 101 else
96 rs = NULL; 102 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98 103
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port)); 105 ntohs(port));
@@ -121,22 +126,15 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 do { 126 do {
122 if (rover == 0) 127 if (rover == 0)
123 rover++; 128 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { 129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
125 *port = cpu_to_be16(rover); 130 *port = rs->rs_bound_port;
126 ret = 0; 131 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port));
127 break; 134 break;
128 } 135 }
129 } while (rover++ != last); 136 } while (rover++ != last);
130 137
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags); 138 spin_unlock_irqrestore(&rds_bind_lock, flags);
141 139
142 return ret; 140 return ret;
@@ -153,7 +151,7 @@ void rds_remove_bound(struct rds_sock *rs)
153 rs, &rs->rs_bound_addr, 151 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port)); 152 ntohs(rs->rs_bound_port));
155 153
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree); 154 hlist_del_init_rcu(&rs->rs_bound_node);
157 rds_sock_put(rs); 155 rds_sock_put(rs);
158 rs->rs_bound_addr = 0; 156 rs->rs_bound_addr = 0;
159 } 157 }
@@ -184,7 +182,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
184 goto out; 182 goto out;
185 183
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) { 185 if (!trans) {
188 ret = -EADDRNOTAVAIL; 186 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs); 187 rds_remove_bound(rs);
190 if (printk_ratelimit()) 188 if (printk_ratelimit())
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198 196
199out: 197out:
200 release_sock(sk); 198 release_sock(sk);
199
200 /* we might have called rds_remove_bound on error */
201 if (ret)
202 synchronize_rcu();
201 return ret; 203 return ret;
202} 204}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29f0780..75ea686f27d5 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -141,7 +141,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
141 unsigned long flags; 141 unsigned long flags;
142 142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); 143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (map == NULL) 144 if (!map)
145 return NULL; 145 return NULL;
146 146
147 map->m_addr = addr; 147 map->m_addr = addr;
@@ -159,7 +159,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
159 ret = rds_cong_tree_walk(addr, map); 159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags); 160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161 161
162 if (ret == NULL) { 162 if (!ret) {
163 ret = map; 163 ret = map;
164 map = NULL; 164 map = NULL;
165 } 165 }
@@ -205,7 +205,7 @@ int rds_cong_get_maps(struct rds_connection *conn)
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr); 205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr); 206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207 207
208 if (conn->c_lcong == NULL || conn->c_fcong == NULL) 208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 return 0; 211 return 0;
@@ -221,7 +221,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 rds_send_xmit(conn);
225 } 225 }
226 } 226 }
227 227
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca28..9334d892366e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -100,7 +88,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
100 * and receiving over this connection again in the future. It is up to 88 * and receiving over this connection again in the future. It is up to
101 * the transport to have serialized this call with its send and recv. 89 * the transport to have serialized this call with its send and recv.
102 */ 90 */
103void rds_conn_reset(struct rds_connection *conn) 91static void rds_conn_reset(struct rds_connection *conn)
104{ 92{
105 rdsdebug("connection %pI4 to %pI4 reset\n", 93 rdsdebug("connection %pI4 to %pI4 reset\n",
106 &conn->c_laddr, &conn->c_faddr); 94 &conn->c_laddr, &conn->c_faddr);
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700a..4123967d4d65 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -42,7 +42,7 @@
42#include "rds.h" 42#include "rds.h"
43#include "ib.h" 43#include "ib.h"
44 44
45unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE; 45static unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
46unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */ 46unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
47unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 47unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
48 48
@@ -53,13 +53,72 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
59DEFINE_SPINLOCK(ib_nodev_conns_lock); 65DEFINE_SPINLOCK(ib_nodev_conns_lock);
60LIST_HEAD(ib_nodev_conns); 66LIST_HEAD(ib_nodev_conns);
61 67
62void rds_ib_add_one(struct ib_device *device) 68static void rds_ib_nodev_connect(void)
69{
70 struct rds_ib_connection *ic;
71
72 spin_lock(&ib_nodev_conns_lock);
73 list_for_each_entry(ic, &ib_nodev_conns, ib_node)
74 rds_conn_connect_if_down(ic->conn);
75 spin_unlock(&ib_nodev_conns_lock);
76}
77
78static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
79{
80 struct rds_ib_connection *ic;
81 unsigned long flags;
82
83 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
84 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
85 rds_conn_drop(ic->conn);
86 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
87}
88
89/*
90 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
91 * from interrupt context so we push freing off into a work struct in krdsd.
92 */
93static void rds_ib_dev_free(struct work_struct *work)
94{
95 struct rds_ib_ipaddr *i_ipaddr, *i_next;
96 struct rds_ib_device *rds_ibdev = container_of(work,
97 struct rds_ib_device, free_work);
98
99 if (rds_ibdev->mr_pool)
100 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
101 if (rds_ibdev->mr)
102 ib_dereg_mr(rds_ibdev->mr);
103 if (rds_ibdev->pd)
104 ib_dealloc_pd(rds_ibdev->pd);
105
106 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
107 list_del(&i_ipaddr->list);
108 kfree(i_ipaddr);
109 }
110
111 kfree(rds_ibdev);
112}
113
114void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
115{
116 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
117 if (atomic_dec_and_test(&rds_ibdev->refcount))
118 queue_work(rds_wq, &rds_ibdev->free_work);
119}
120
121static void rds_ib_add_one(struct ib_device *device)
63{ 122{
64 struct rds_ib_device *rds_ibdev; 123 struct rds_ib_device *rds_ibdev;
65 struct ib_device_attr *dev_attr; 124 struct ib_device_attr *dev_attr;
@@ -77,11 +136,14 @@ void rds_ib_add_one(struct ib_device *device)
77 goto free_attr; 136 goto free_attr;
78 } 137 }
79 138
80 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); 139 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
140 ibdev_to_node(device));
81 if (!rds_ibdev) 141 if (!rds_ibdev)
82 goto free_attr; 142 goto free_attr;
83 143
84 spin_lock_init(&rds_ibdev->spinlock); 144 spin_lock_init(&rds_ibdev->spinlock);
145 atomic_set(&rds_ibdev->refcount, 1);
146 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
85 147
86 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 148 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
87 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 149 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
@@ -91,68 +153,107 @@ void rds_ib_add_one(struct ib_device *device)
91 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 153 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
92 fmr_pool_size; 154 fmr_pool_size;
93 155
156 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
157 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
158
94 rds_ibdev->dev = device; 159 rds_ibdev->dev = device;
95 rds_ibdev->pd = ib_alloc_pd(device); 160 rds_ibdev->pd = ib_alloc_pd(device);
96 if (IS_ERR(rds_ibdev->pd)) 161 if (IS_ERR(rds_ibdev->pd)) {
97 goto free_dev; 162 rds_ibdev->pd = NULL;
163 goto put_dev;
164 }
98 165
99 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, 166 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
100 IB_ACCESS_LOCAL_WRITE); 167 if (IS_ERR(rds_ibdev->mr)) {
101 if (IS_ERR(rds_ibdev->mr)) 168 rds_ibdev->mr = NULL;
102 goto err_pd; 169 goto put_dev;
170 }
103 171
104 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 172 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
105 if (IS_ERR(rds_ibdev->mr_pool)) { 173 if (IS_ERR(rds_ibdev->mr_pool)) {
106 rds_ibdev->mr_pool = NULL; 174 rds_ibdev->mr_pool = NULL;
107 goto err_mr; 175 goto put_dev;
108 } 176 }
109 177
110 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
111 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
112 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
184 atomic_inc(&rds_ibdev->refcount);
113 185
114 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
187 atomic_inc(&rds_ibdev->refcount);
115 188
116 goto free_attr; 189 rds_ib_nodev_connect();
117 190
118err_mr: 191put_dev:
119 ib_dereg_mr(rds_ibdev->mr); 192 rds_ib_dev_put(rds_ibdev);
120err_pd:
121 ib_dealloc_pd(rds_ibdev->pd);
122free_dev:
123 kfree(rds_ibdev);
124free_attr: 193free_attr:
125 kfree(dev_attr); 194 kfree(dev_attr);
126} 195}
127 196
128void rds_ib_remove_one(struct ib_device *device) 197/*
198 * New connections use this to find the device to associate with the
199 * connection. It's not in the fast path so we're not concerned about the
200 * performance of the IB call. (As of this writing, it uses an interrupt
201 * blocking spinlock to serialize walking a per-device list of all registered
202 * clients.)
203 *
204 * RCU is used to handle incoming connections racing with device teardown.
205 * Rather than use a lock to serialize removal from the client_data and
206 * getting a new reference, we use an RCU grace period. The destruction
207 * path removes the device from client_data and then waits for all RCU
208 * readers to finish.
209 *
210 * A new connection can get NULL from this if its arriving on a
211 * device that is in the process of being removed.
212 */
213struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
129{ 214{
130 struct rds_ib_device *rds_ibdev; 215 struct rds_ib_device *rds_ibdev;
131 struct rds_ib_ipaddr *i_ipaddr, *i_next;
132 216
217 rcu_read_lock();
133 rds_ibdev = ib_get_client_data(device, &rds_ib_client); 218 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
134 if (!rds_ibdev) 219 if (rds_ibdev)
135 return; 220 atomic_inc(&rds_ibdev->refcount);
221 rcu_read_unlock();
222 return rds_ibdev;
223}
136 224
137 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 225/*
138 list_del(&i_ipaddr->list); 226 * The IB stack is letting us know that a device is going away. This can
139 kfree(i_ipaddr); 227 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
140 } 228 * the pci function, for example.
229 *
230 * This can be called at any time and can be racing with any other RDS path.
231 */
232static void rds_ib_remove_one(struct ib_device *device)
233{
234 struct rds_ib_device *rds_ibdev;
141 235
142 rds_ib_destroy_conns(rds_ibdev); 236 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
237 if (!rds_ibdev)
238 return;
143 239
144 if (rds_ibdev->mr_pool) 240 rds_ib_dev_shutdown(rds_ibdev);
145 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
146 241
147 ib_dereg_mr(rds_ibdev->mr); 242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
148 244
149 while (ib_dealloc_pd(rds_ibdev->pd)) { 245 down_write(&rds_ib_devices_lock);
150 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd); 246 list_del_rcu(&rds_ibdev->list);
151 msleep(1); 247 up_write(&rds_ib_devices_lock);
152 }
153 248
154 list_del(&rds_ibdev->list); 249 /*
155 kfree(rds_ibdev); 250 * This synchronize rcu is waiting for readers of both the ib
251 * client data and the devices list to finish before we drop
252 * both of those references.
253 */
254 synchronize_rcu();
255 rds_ib_dev_put(rds_ibdev);
256 rds_ib_dev_put(rds_ibdev);
156} 257}
157 258
158struct ib_client rds_ib_client = { 259struct ib_client rds_ib_client = {
@@ -186,7 +287,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
186 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 287 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
187 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 288 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
188 289
189 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 290 rds_ibdev = ic->rds_ibdev;
190 iinfo->max_send_wr = ic->i_send_ring.w_nr; 291 iinfo->max_send_wr = ic->i_send_ring.w_nr;
191 iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 292 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
192 iinfo->max_send_sge = rds_ibdev->max_sge; 293 iinfo->max_send_sge = rds_ibdev->max_sge;
@@ -248,29 +349,36 @@ static int rds_ib_laddr_check(__be32 addr)
248 return ret; 349 return ret;
249} 350}
250 351
352static void rds_ib_unregister_client(void)
353{
354 ib_unregister_client(&rds_ib_client);
355 /* wait for rds_ib_dev_free() to complete */
356 flush_workqueue(rds_wq);
357}
358
251void rds_ib_exit(void) 359void rds_ib_exit(void)
252{ 360{
253 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 361 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
362 rds_ib_unregister_client();
254 rds_ib_destroy_nodev_conns(); 363 rds_ib_destroy_nodev_conns();
255 ib_unregister_client(&rds_ib_client);
256 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
257 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
258 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
259} 368}
260 369
261struct rds_transport rds_ib_transport = { 370struct rds_transport rds_ib_transport = {
262 .laddr_check = rds_ib_laddr_check, 371 .laddr_check = rds_ib_laddr_check,
263 .xmit_complete = rds_ib_xmit_complete, 372 .xmit_complete = rds_ib_xmit_complete,
264 .xmit = rds_ib_xmit, 373 .xmit = rds_ib_xmit,
265 .xmit_cong_map = NULL,
266 .xmit_rdma = rds_ib_xmit_rdma, 374 .xmit_rdma = rds_ib_xmit_rdma,
375 .xmit_atomic = rds_ib_xmit_atomic,
267 .recv = rds_ib_recv, 376 .recv = rds_ib_recv,
268 .conn_alloc = rds_ib_conn_alloc, 377 .conn_alloc = rds_ib_conn_alloc,
269 .conn_free = rds_ib_conn_free, 378 .conn_free = rds_ib_conn_free,
270 .conn_connect = rds_ib_conn_connect, 379 .conn_connect = rds_ib_conn_connect,
271 .conn_shutdown = rds_ib_conn_shutdown, 380 .conn_shutdown = rds_ib_conn_shutdown,
272 .inc_copy_to_user = rds_ib_inc_copy_to_user, 381 .inc_copy_to_user = rds_ib_inc_copy_to_user,
273 .inc_purge = rds_ib_inc_purge,
274 .inc_free = rds_ib_inc_free, 382 .inc_free = rds_ib_inc_free,
275 .cm_initiate_connect = rds_ib_cm_initiate_connect, 383 .cm_initiate_connect = rds_ib_cm_initiate_connect,
276 .cm_handle_connect = rds_ib_cm_handle_connect, 384 .cm_handle_connect = rds_ib_cm_handle_connect,
@@ -286,16 +394,20 @@ struct rds_transport rds_ib_transport = {
286 .t_type = RDS_TRANS_IB 394 .t_type = RDS_TRANS_IB
287}; 395};
288 396
289int __init rds_ib_init(void) 397int rds_ib_init(void)
290{ 398{
291 int ret; 399 int ret;
292 400
293 INIT_LIST_HEAD(&rds_ib_devices); 401 INIT_LIST_HEAD(&rds_ib_devices);
294 402
295 ret = ib_register_client(&rds_ib_client); 403 ret = rds_ib_fmr_init();
296 if (ret) 404 if (ret)
297 goto out; 405 goto out;
298 406
407 ret = ib_register_client(&rds_ib_client);
408 if (ret)
409 goto out_fmr_exit;
410
299 ret = rds_ib_sysctl_init(); 411 ret = rds_ib_sysctl_init();
300 if (ret) 412 if (ret)
301 goto out_ibreg; 413 goto out_ibreg;
@@ -317,7 +429,9 @@ out_recv:
317out_sysctl: 429out_sysctl:
318 rds_ib_sysctl_exit(); 430 rds_ib_sysctl_exit();
319out_ibreg: 431out_ibreg:
320 ib_unregister_client(&rds_ib_client); 432 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
321out: 435out:
322 return ret; 436 return ret;
323} 437}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29f..e34ad032b66d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -3,11 +3,13 @@
3 3
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <linux/pci.h>
7#include <linux/slab.h>
6#include "rds.h" 8#include "rds.h"
7#include "rdma_transport.h" 9#include "rdma_transport.h"
8 10
9#define RDS_FMR_SIZE 256 11#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096 12#define RDS_FMR_POOL_SIZE 8192
11 13
12#define RDS_IB_MAX_SGE 8 14#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2 15#define RDS_IB_RECV_SGE 2
@@ -19,6 +21,9 @@
19 21
20#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
21 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
26extern struct rw_semaphore rds_ib_devices_lock;
22extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
23 28
24/* 29/*
@@ -26,20 +31,29 @@ extern struct list_head rds_ib_devices;
26 * try and minimize the amount of memory tied up both the device and 31 * try and minimize the amount of memory tied up both the device and
27 * socket receive queues. 32 * socket receive queues.
28 */ 33 */
29/* page offset of the final full frag that fits in the page */
30#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
31struct rds_page_frag { 34struct rds_page_frag {
32 struct list_head f_item; 35 struct list_head f_item;
33 struct page *f_page; 36 struct list_head f_cache_entry;
34 unsigned long f_offset; 37 struct scatterlist f_sg;
35 dma_addr_t f_mapped;
36}; 38};
37 39
38struct rds_ib_incoming { 40struct rds_ib_incoming {
39 struct list_head ii_frags; 41 struct list_head ii_frags;
42 struct list_head ii_cache_entry;
40 struct rds_incoming ii_inc; 43 struct rds_incoming ii_inc;
41}; 44};
42 45
46struct rds_ib_cache_head {
47 struct list_head *first;
48 unsigned long count;
49};
50
51struct rds_ib_refill_cache {
52 struct rds_ib_cache_head *percpu;
53 struct list_head *xfer;
54 struct list_head *ready;
55};
56
43struct rds_ib_connect_private { 57struct rds_ib_connect_private {
44 /* Add new fields at the end, and don't permute existing fields. */ 58 /* Add new fields at the end, and don't permute existing fields. */
45 __be32 dp_saddr; 59 __be32 dp_saddr;
@@ -53,8 +67,7 @@ struct rds_ib_connect_private {
53}; 67};
54 68
55struct rds_ib_send_work { 69struct rds_ib_send_work {
56 struct rds_message *s_rm; 70 void *s_op;
57 struct rds_rdma_op *s_op;
58 struct ib_send_wr s_wr; 71 struct ib_send_wr s_wr;
59 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 72 struct ib_sge s_sge[RDS_IB_MAX_SGE];
60 unsigned long s_queued; 73 unsigned long s_queued;
@@ -92,10 +105,11 @@ struct rds_ib_connection {
92 105
93 /* tx */ 106 /* tx */
94 struct rds_ib_work_ring i_send_ring; 107 struct rds_ib_work_ring i_send_ring;
95 struct rds_message *i_rm; 108 struct rm_data_op *i_data_op;
96 struct rds_header *i_send_hdrs; 109 struct rds_header *i_send_hdrs;
97 u64 i_send_hdrs_dma; 110 u64 i_send_hdrs_dma;
98 struct rds_ib_send_work *i_sends; 111 struct rds_ib_send_work *i_sends;
112 atomic_t i_signaled_sends;
99 113
100 /* rx */ 114 /* rx */
101 struct tasklet_struct i_recv_tasklet; 115 struct tasklet_struct i_recv_tasklet;
@@ -106,8 +120,9 @@ struct rds_ib_connection {
106 struct rds_header *i_recv_hdrs; 120 struct rds_header *i_recv_hdrs;
107 u64 i_recv_hdrs_dma; 121 u64 i_recv_hdrs_dma;
108 struct rds_ib_recv_work *i_recvs; 122 struct rds_ib_recv_work *i_recvs;
109 struct rds_page_frag i_frag;
110 u64 i_ack_recv; /* last ACK received */ 123 u64 i_ack_recv; /* last ACK received */
124 struct rds_ib_refill_cache i_cache_incs;
125 struct rds_ib_refill_cache i_cache_frags;
111 126
112 /* sending acks */ 127 /* sending acks */
113 unsigned long i_ack_flags; 128 unsigned long i_ack_flags;
@@ -138,7 +153,6 @@ struct rds_ib_connection {
138 153
139 /* Batched completions */ 154 /* Batched completions */
140 unsigned int i_unsignaled_wrs; 155 unsigned int i_unsignaled_wrs;
141 long i_unsignaled_bytes;
142}; 156};
143 157
144/* This assumes that atomic_t is at least 32 bits */ 158/* This assumes that atomic_t is at least 32 bits */
@@ -164,9 +178,17 @@ struct rds_ib_device {
164 unsigned int max_fmrs; 178 unsigned int max_fmrs;
165 int max_sge; 179 int max_sge;
166 unsigned int max_wrs; 180 unsigned int max_wrs;
181 unsigned int max_initiator_depth;
182 unsigned int max_responder_resources;
167 spinlock_t spinlock; /* protect the above */ 183 spinlock_t spinlock; /* protect the above */
184 atomic_t refcount;
185 struct work_struct free_work;
168}; 186};
169 187
188#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
189#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191
170/* bits for i_ack_flags */ 192/* bits for i_ack_flags */
171#define IB_ACK_IN_FLIGHT 0 193#define IB_ACK_IN_FLIGHT 0
172#define IB_ACK_REQUESTED 1 194#define IB_ACK_REQUESTED 1
@@ -202,6 +224,8 @@ struct rds_ib_statistics {
202 uint64_t s_ib_rdma_mr_pool_flush; 224 uint64_t s_ib_rdma_mr_pool_flush;
203 uint64_t s_ib_rdma_mr_pool_wait; 225 uint64_t s_ib_rdma_mr_pool_wait;
204 uint64_t s_ib_rdma_mr_pool_depleted; 226 uint64_t s_ib_rdma_mr_pool_depleted;
227 uint64_t s_ib_atomic_cswp;
228 uint64_t s_ib_atomic_fadd;
205}; 229};
206 230
207extern struct workqueue_struct *rds_ib_wq; 231extern struct workqueue_struct *rds_ib_wq;
@@ -241,11 +265,10 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
241 265
242/* ib.c */ 266/* ib.c */
243extern struct rds_transport rds_ib_transport; 267extern struct rds_transport rds_ib_transport;
244extern void rds_ib_add_one(struct ib_device *device); 268struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
245extern void rds_ib_remove_one(struct ib_device *device); 269void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
246extern struct ib_client rds_ib_client; 270extern struct ib_client rds_ib_client;
247 271
248extern unsigned int fmr_pool_size;
249extern unsigned int fmr_message_size; 272extern unsigned int fmr_message_size;
250extern unsigned int rds_ib_retry_count; 273extern unsigned int rds_ib_retry_count;
251 274
@@ -258,7 +281,7 @@ void rds_ib_conn_free(void *arg);
258int rds_ib_conn_connect(struct rds_connection *conn); 281int rds_ib_conn_connect(struct rds_connection *conn);
259void rds_ib_conn_shutdown(struct rds_connection *conn); 282void rds_ib_conn_shutdown(struct rds_connection *conn);
260void rds_ib_state_change(struct sock *sk); 283void rds_ib_state_change(struct sock *sk);
261int __init rds_ib_listen_init(void); 284int rds_ib_listen_init(void);
262void rds_ib_listen_stop(void); 285void rds_ib_listen_stop(void);
263void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); 286void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
264int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 287int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -275,15 +298,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
275int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 298int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
276void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 299void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
277void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 300void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
278void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); 301void rds_ib_destroy_nodev_conns(void);
279static inline void rds_ib_destroy_nodev_conns(void)
280{
281 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
282}
283static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
284{
285 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
286}
287struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 302struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
288void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 303void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
289void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 304void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -292,14 +307,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
292void rds_ib_sync_mr(void *trans_private, int dir); 307void rds_ib_sync_mr(void *trans_private, int dir);
293void rds_ib_free_mr(void *trans_private, int invalidate); 308void rds_ib_free_mr(void *trans_private, int invalidate);
294void rds_ib_flush_mrs(void); 309void rds_ib_flush_mrs(void);
310int rds_ib_fmr_init(void);
311void rds_ib_fmr_exit(void);
295 312
296/* ib_recv.c */ 313/* ib_recv.c */
297int __init rds_ib_recv_init(void); 314int rds_ib_recv_init(void);
298void rds_ib_recv_exit(void); 315void rds_ib_recv_exit(void);
299int rds_ib_recv(struct rds_connection *conn); 316int rds_ib_recv(struct rds_connection *conn);
300int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 317int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
301 gfp_t page_gfp, int prefill); 318void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
302void rds_ib_inc_purge(struct rds_incoming *inc); 319void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
303void rds_ib_inc_free(struct rds_incoming *inc); 320void rds_ib_inc_free(struct rds_incoming *inc);
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 321int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
305 size_t size); 322 size_t size);
@@ -325,17 +342,19 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
325extern wait_queue_head_t rds_ib_ring_empty_wait; 342extern wait_queue_head_t rds_ib_ring_empty_wait;
326 343
327/* ib_send.c */ 344/* ib_send.c */
345char *rds_ib_wc_status_str(enum ib_wc_status status);
328void rds_ib_xmit_complete(struct rds_connection *conn); 346void rds_ib_xmit_complete(struct rds_connection *conn);
329int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 347int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
330 unsigned int hdr_off, unsigned int sg, unsigned int off); 348 unsigned int hdr_off, unsigned int sg, unsigned int off);
331void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 349void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
332void rds_ib_send_init_ring(struct rds_ib_connection *ic); 350void rds_ib_send_init_ring(struct rds_ib_connection *ic);
333void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 351void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
334int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 352int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
335void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); 353void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
336void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); 354void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
337int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, 355int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
338 u32 *adv_credits, int need_posted, int max_posted); 356 u32 *adv_credits, int need_posted, int max_posted);
357int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
339 358
340/* ib_stats.c */ 359/* ib_stats.c */
341DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); 360DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
@@ -344,7 +363,7 @@ unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
344 unsigned int avail); 363 unsigned int avail);
345 364
346/* ib_sysctl.c */ 365/* ib_sysctl.c */
347int __init rds_ib_sysctl_init(void); 366int rds_ib_sysctl_init(void);
348void rds_ib_sysctl_exit(void); 367void rds_ib_sysctl_exit(void);
349extern unsigned long rds_ib_sysctl_max_send_wr; 368extern unsigned long rds_ib_sysctl_max_send_wr;
350extern unsigned long rds_ib_sysctl_max_recv_wr; 369extern unsigned long rds_ib_sysctl_max_recv_wr;
@@ -352,30 +371,5 @@ extern unsigned long rds_ib_sysctl_max_unsig_wrs;
352extern unsigned long rds_ib_sysctl_max_unsig_bytes; 371extern unsigned long rds_ib_sysctl_max_unsig_bytes;
353extern unsigned long rds_ib_sysctl_max_recv_allocation; 372extern unsigned long rds_ib_sysctl_max_recv_allocation;
354extern unsigned int rds_ib_sysctl_flow_control; 373extern unsigned int rds_ib_sysctl_flow_control;
355extern ctl_table rds_ib_sysctl_table[];
356
357/*
358 * Helper functions for getting/setting the header and data SGEs in
359 * RDS packets (not RDMA)
360 *
361 * From version 3.1 onwards, header is in front of data in the sge.
362 */
363static inline struct ib_sge *
364rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
365{
366 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
367 return &sge[0];
368 else
369 return &sge[1];
370}
371
372static inline struct ib_sge *
373rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
374{
375 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
376 return &sge[1];
377 else
378 return &sge[0];
379}
380 374
381#endif 375#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f68832798db2..ee369d201a65 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -38,6 +38,36 @@
38#include "rds.h" 38#include "rds.h"
39#include "ib.h" 39#include "ib.h"
40 40
41static char *rds_ib_event_type_strings[] = {
42#define RDS_IB_EVENT_STRING(foo) \
43 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
44 RDS_IB_EVENT_STRING(CQ_ERR),
45 RDS_IB_EVENT_STRING(QP_FATAL),
46 RDS_IB_EVENT_STRING(QP_REQ_ERR),
47 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
48 RDS_IB_EVENT_STRING(COMM_EST),
49 RDS_IB_EVENT_STRING(SQ_DRAINED),
50 RDS_IB_EVENT_STRING(PATH_MIG),
51 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
52 RDS_IB_EVENT_STRING(DEVICE_FATAL),
53 RDS_IB_EVENT_STRING(PORT_ACTIVE),
54 RDS_IB_EVENT_STRING(PORT_ERR),
55 RDS_IB_EVENT_STRING(LID_CHANGE),
56 RDS_IB_EVENT_STRING(PKEY_CHANGE),
57 RDS_IB_EVENT_STRING(SM_CHANGE),
58 RDS_IB_EVENT_STRING(SRQ_ERR),
59 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
60 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
61 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
62#undef RDS_IB_EVENT_STRING
63};
64
65static char *rds_ib_event_str(enum ib_event_type type)
66{
67 return rds_str_array(rds_ib_event_type_strings,
68 ARRAY_SIZE(rds_ib_event_type_strings), type);
69};
70
41/* 71/*
42 * Set the selected protocol version 72 * Set the selected protocol version
43 */ 73 */
@@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 125{
96 const struct rds_ib_connect_private *dp = NULL; 126 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 127 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 128 struct ib_qp_attr qp_attr;
100 int err; 129 int err;
101 130
@@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
111 } 140 }
112 } 141 }
113 142
114 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", 143 if (conn->c_version < RDS_PROTOCOL(3,1)) {
115 &conn->c_faddr, 144 printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
116 RDS_PROTOCOL_MAJOR(conn->c_version), 145 " no longer supported\n",
117 RDS_PROTOCOL_MINOR(conn->c_version), 146 &conn->c_faddr,
118 ic->i_flowctl ? ", flow control" : ""); 147 RDS_PROTOCOL_MAJOR(conn->c_version),
148 RDS_PROTOCOL_MINOR(conn->c_version));
149 rds_conn_destroy(conn);
150 return;
151 } else {
152 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
153 &conn->c_faddr,
154 RDS_PROTOCOL_MAJOR(conn->c_version),
155 RDS_PROTOCOL_MINOR(conn->c_version),
156 ic->i_flowctl ? ", flow control" : "");
157 }
119 158
120 /* 159 /*
121 * Init rings and fill recv. this needs to wait until protocol negotiation 160 * Init rings and fill recv. this needs to wait until protocol negotiation
@@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
125 rds_ib_recv_init_ring(ic); 164 rds_ib_recv_init_ring(ic);
126 /* Post receive buffers - as a side effect, this will update 165 /* Post receive buffers - as a side effect, this will update
127 * the posted credit count. */ 166 * the posted credit count. */
128 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); 167 rds_ib_recv_refill(conn, 1);
129 168
130 /* Tune RNR behavior */ 169 /* Tune RNR behavior */
131 rds_ib_tune_rnr(ic, &qp_attr); 170 rds_ib_tune_rnr(ic, &qp_attr);
@@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
135 if (err) 174 if (err)
136 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 175 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
137 176
138 /* update ib_device with this local ipaddr & conn */ 177 /* update ib_device with this local ipaddr */
139 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 178 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
140 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
141 if (err) 179 if (err)
142 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 180 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
143 rds_ib_add_conn(rds_ibdev, conn); 181 err);
144 182
145 /* If the peer gave us the last packet it saw, process this as if 183 /* If the peer gave us the last packet it saw, process this as if
146 * we had received a regular ACK. */ 184 * we had received a regular ACK. */
@@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
153static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 191static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
154 struct rdma_conn_param *conn_param, 192 struct rdma_conn_param *conn_param,
155 struct rds_ib_connect_private *dp, 193 struct rds_ib_connect_private *dp,
156 u32 protocol_version) 194 u32 protocol_version,
195 u32 max_responder_resources,
196 u32 max_initiator_depth)
157{ 197{
198 struct rds_ib_connection *ic = conn->c_transport_data;
199 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
200
158 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 201 memset(conn_param, 0, sizeof(struct rdma_conn_param));
159 /* XXX tune these? */ 202
160 conn_param->responder_resources = 1; 203 conn_param->responder_resources =
161 conn_param->initiator_depth = 1; 204 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
205 conn_param->initiator_depth =
206 min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
162 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 207 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
163 conn_param->rnr_retry_count = 7; 208 conn_param->rnr_retry_count = 7;
164 209
165 if (dp) { 210 if (dp) {
166 struct rds_ib_connection *ic = conn->c_transport_data;
167
168 memset(dp, 0, sizeof(*dp)); 211 memset(dp, 0, sizeof(*dp));
169 dp->dp_saddr = conn->c_laddr; 212 dp->dp_saddr = conn->c_laddr;
170 dp->dp_daddr = conn->c_faddr; 213 dp->dp_daddr = conn->c_faddr;
@@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
189 232
190static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 233static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
191{ 234{
192 rdsdebug("event %u data %p\n", event->event, data); 235 rdsdebug("event %u (%s) data %p\n",
236 event->event, rds_ib_event_str(event->event), data);
193} 237}
194 238
195static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 239static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
197 struct rds_connection *conn = data; 241 struct rds_connection *conn = data;
198 struct rds_ib_connection *ic = conn->c_transport_data; 242 struct rds_ib_connection *ic = conn->c_transport_data;
199 243
200 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); 244 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
245 rds_ib_event_str(event->event));
201 246
202 switch (event->event) { 247 switch (event->event) {
203 case IB_EVENT_COMM_EST: 248 case IB_EVENT_COMM_EST:
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 249 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 250 break;
206 default: 251 default:
207 rdsdebug("Fatal QP Event %u " 252 rdsdebug("Fatal QP Event %u (%s) "
208 "- connection %pI4->%pI4, reconnecting\n", 253 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 254 event->event, rds_ib_event_str(event->event),
255 &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn); 256 rds_conn_drop(conn);
211 break; 257 break;
212 } 258 }
@@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
224 struct rds_ib_device *rds_ibdev; 270 struct rds_ib_device *rds_ibdev;
225 int ret; 271 int ret;
226 272
227 /* rds_ib_add_one creates a rds_ib_device object per IB device, 273 /*
228 * and allocates a protection domain, memory range and FMR pool 274 * It's normal to see a null device if an incoming connection races
229 * for each. If that fails for any reason, it will not register 275 * with device removal, so we don't print a warning.
230 * the rds_ibdev at all.
231 */ 276 */
232 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 277 rds_ibdev = rds_ib_get_client_data(dev);
233 if (rds_ibdev == NULL) { 278 if (!rds_ibdev)
234 if (printk_ratelimit())
235 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
236 dev->name);
237 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
238 } 280
281 /* add the conn now so that connection establishment has the dev */
282 rds_ib_add_conn(rds_ibdev, conn);
239 283
240 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 284 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
241 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 285 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
306 ic->i_send_ring.w_nr * 350 ic->i_send_ring.w_nr *
307 sizeof(struct rds_header), 351 sizeof(struct rds_header),
308 &ic->i_send_hdrs_dma, GFP_KERNEL); 352 &ic->i_send_hdrs_dma, GFP_KERNEL);
309 if (ic->i_send_hdrs == NULL) { 353 if (!ic->i_send_hdrs) {
310 ret = -ENOMEM; 354 ret = -ENOMEM;
311 rdsdebug("ib_dma_alloc_coherent send failed\n"); 355 rdsdebug("ib_dma_alloc_coherent send failed\n");
312 goto out; 356 goto out;
@@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
316 ic->i_recv_ring.w_nr * 360 ic->i_recv_ring.w_nr *
317 sizeof(struct rds_header), 361 sizeof(struct rds_header),
318 &ic->i_recv_hdrs_dma, GFP_KERNEL); 362 &ic->i_recv_hdrs_dma, GFP_KERNEL);
319 if (ic->i_recv_hdrs == NULL) { 363 if (!ic->i_recv_hdrs) {
320 ret = -ENOMEM; 364 ret = -ENOMEM;
321 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 365 rdsdebug("ib_dma_alloc_coherent recv failed\n");
322 goto out; 366 goto out;
@@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
324 368
325 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 369 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
326 &ic->i_ack_dma, GFP_KERNEL); 370 &ic->i_ack_dma, GFP_KERNEL);
327 if (ic->i_ack == NULL) { 371 if (!ic->i_ack) {
328 ret = -ENOMEM; 372 ret = -ENOMEM;
329 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 373 rdsdebug("ib_dma_alloc_coherent ack failed\n");
330 goto out; 374 goto out;
331 } 375 }
332 376
333 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 377 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
334 if (ic->i_sends == NULL) { 378 ibdev_to_node(dev));
379 if (!ic->i_sends) {
335 ret = -ENOMEM; 380 ret = -ENOMEM;
336 rdsdebug("send allocation failed\n"); 381 rdsdebug("send allocation failed\n");
337 goto out; 382 goto out;
338 } 383 }
339 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 384 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
340 385
341 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); 386 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
342 if (ic->i_recvs == NULL) { 387 ibdev_to_node(dev));
388 if (!ic->i_recvs) {
343 ret = -ENOMEM; 389 ret = -ENOMEM;
344 rdsdebug("recv allocation failed\n"); 390 rdsdebug("recv allocation failed\n");
345 goto out; 391 goto out;
@@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
352 ic->i_send_cq, ic->i_recv_cq); 398 ic->i_send_cq, ic->i_recv_cq);
353 399
354out: 400out:
401 rds_ib_dev_put(rds_ibdev);
355 return ret; 402 return ret;
356} 403}
357 404
@@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
409 struct rds_ib_connection *ic = NULL; 456 struct rds_ib_connection *ic = NULL;
410 struct rdma_conn_param conn_param; 457 struct rdma_conn_param conn_param;
411 u32 version; 458 u32 version;
412 int err, destroy = 1; 459 int err = 1, destroy = 1;
413 460
414 /* Check whether the remote protocol version matches ours. */ 461 /* Check whether the remote protocol version matches ours. */
415 version = rds_ib_protocol_compatible(event); 462 version = rds_ib_protocol_compatible(event);
@@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448 /* Wait and see - our connect may still be succeeding */ 495 /* Wait and see - our connect may still be succeeding */
449 rds_ib_stats_inc(s_ib_connect_raced); 496 rds_ib_stats_inc(s_ib_connect_raced);
450 } 497 }
451 mutex_unlock(&conn->c_cm_lock);
452 goto out; 498 goto out;
453 } 499 }
454 500
@@ -475,24 +521,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
475 err = rds_ib_setup_qp(conn); 521 err = rds_ib_setup_qp(conn);
476 if (err) { 522 if (err) {
477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 523 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
479 goto out; 524 goto out;
480 } 525 }
481 526
482 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); 527 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
528 event->param.conn.responder_resources,
529 event->param.conn.initiator_depth);
483 530
484 /* rdma_accept() calls rdma_reject() internally if it fails */ 531 /* rdma_accept() calls rdma_reject() internally if it fails */
485 err = rdma_accept(cm_id, &conn_param); 532 err = rdma_accept(cm_id, &conn_param);
486 mutex_unlock(&conn->c_cm_lock); 533 if (err)
487 if (err) {
488 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); 534 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
489 goto out;
490 }
491
492 return 0;
493 535
494out: 536out:
495 rdma_reject(cm_id, NULL, 0); 537 if (conn)
538 mutex_unlock(&conn->c_cm_lock);
539 if (err)
540 rdma_reject(cm_id, NULL, 0);
496 return destroy; 541 return destroy;
497} 542}
498 543
@@ -516,8 +561,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
516 goto out; 561 goto out;
517 } 562 }
518 563
519 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); 564 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
520 565 UINT_MAX, UINT_MAX);
521 ret = rdma_connect(cm_id, &conn_param); 566 ret = rdma_connect(cm_id, &conn_param);
522 if (ret) 567 if (ret)
523 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); 568 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -601,9 +646,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
601 ic->i_cm_id, err); 646 ic->i_cm_id, err);
602 } 647 }
603 648
649 /*
650 * We want to wait for tx and rx completion to finish
651 * before we tear down the connection, but we have to be
652 * careful not to get stuck waiting on a send ring that
653 * only has unsignaled sends in it. We've shutdown new
654 * sends before getting here so by waiting for signaled
655 * sends to complete we're ensured that there will be no
656 * more tx processing.
657 */
604 wait_event(rds_ib_ring_empty_wait, 658 wait_event(rds_ib_ring_empty_wait,
605 rds_ib_ring_empty(&ic->i_send_ring) && 659 rds_ib_ring_empty(&ic->i_recv_ring) &&
606 rds_ib_ring_empty(&ic->i_recv_ring)); 660 (atomic_read(&ic->i_signaled_sends) == 0));
661 tasklet_kill(&ic->i_recv_tasklet);
607 662
608 if (ic->i_send_hdrs) 663 if (ic->i_send_hdrs)
609 ib_dma_free_coherent(dev, 664 ib_dma_free_coherent(dev,
@@ -654,9 +709,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
654 BUG_ON(ic->rds_ibdev); 709 BUG_ON(ic->rds_ibdev);
655 710
656 /* Clear pending transmit */ 711 /* Clear pending transmit */
657 if (ic->i_rm) { 712 if (ic->i_data_op) {
658 rds_message_put(ic->i_rm); 713 struct rds_message *rm;
659 ic->i_rm = NULL; 714
715 rm = container_of(ic->i_data_op, struct rds_message, data);
716 rds_message_put(rm);
717 ic->i_data_op = NULL;
660 } 718 }
661 719
662 /* Clear the ACK state */ 720 /* Clear the ACK state */
@@ -690,12 +748,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
690{ 748{
691 struct rds_ib_connection *ic; 749 struct rds_ib_connection *ic;
692 unsigned long flags; 750 unsigned long flags;
751 int ret;
693 752
694 /* XXX too lazy? */ 753 /* XXX too lazy? */
695 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 754 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
696 if (ic == NULL) 755 if (!ic)
697 return -ENOMEM; 756 return -ENOMEM;
698 757
758 ret = rds_ib_recv_alloc_caches(ic);
759 if (ret) {
760 kfree(ic);
761 return ret;
762 }
763
699 INIT_LIST_HEAD(&ic->ib_node); 764 INIT_LIST_HEAD(&ic->ib_node);
700 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 765 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
701 (unsigned long) ic); 766 (unsigned long) ic);
@@ -703,6 +768,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
703#ifndef KERNEL_HAS_ATOMIC64 768#ifndef KERNEL_HAS_ATOMIC64
704 spin_lock_init(&ic->i_ack_lock); 769 spin_lock_init(&ic->i_ack_lock);
705#endif 770#endif
771 atomic_set(&ic->i_signaled_sends, 0);
706 772
707 /* 773 /*
708 * rds_ib_conn_shutdown() waits for these to be emptied so they 774 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -744,6 +810,8 @@ void rds_ib_conn_free(void *arg)
744 list_del(&ic->ib_node); 810 list_del(&ic->ib_node);
745 spin_unlock_irq(lock_ptr); 811 spin_unlock_irq(lock_ptr);
746 812
813 rds_ib_recv_free_caches(ic);
814
747 kfree(ic); 815 kfree(ic);
748} 816}
749 817
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a54cd63f9e35..18a833c450c8 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -32,11 +32,16 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h>
35 36
36#include "rds.h" 37#include "rds.h"
37#include "rdma.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41static struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0
40 45
41/* 46/*
42 * This is stored as mr->r_trans_private. 47 * This is stored as mr->r_trans_private.
@@ -45,7 +50,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 50 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 51 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 52 struct ib_fmr *fmr;
48 struct list_head list; 53
54 struct xlist_head xlist;
55
56 /* unmap_list is for freeing */
57 struct list_head unmap_list;
49 unsigned int remap_count; 58 unsigned int remap_count;
50 59
51 struct scatterlist *sg; 60 struct scatterlist *sg;
@@ -59,14 +68,16 @@ struct rds_ib_mr {
59 */ 68 */
60struct rds_ib_mr_pool { 69struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 70 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 71 struct delayed_work flush_worker; /* flush worker */
63 72
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 73 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 74 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 75
68 struct list_head free_list; /* unused MRs */ 76 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 77 struct xlist_head free_list; /* unused MRs */
78 struct xlist_head clean_list; /* global unused & unamapped MRs */
79 wait_queue_head_t flush_wait;
80
70 atomic_t free_pinned; /* memory pinned by free MRs */ 81 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 82 unsigned long max_items;
72 unsigned long max_items_soft; 83 unsigned long max_items_soft;
@@ -74,7 +85,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 85 struct ib_fmr_attr fmr_attr;
75}; 86};
76 87
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 88static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 89static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 90static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 91
@@ -83,16 +94,17 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
83 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
84 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
85 96
86 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
87 spin_lock_irq(&rds_ibdev->spinlock); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
88 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
89 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
90 spin_unlock_irq(&rds_ibdev->spinlock); 101 atomic_inc(&rds_ibdev->refcount);
102 rcu_read_unlock();
91 return rds_ibdev; 103 return rds_ibdev;
92 } 104 }
93 } 105 }
94 spin_unlock_irq(&rds_ibdev->spinlock);
95 } 106 }
107 rcu_read_unlock();
96 108
97 return NULL; 109 return NULL;
98} 110}
@@ -108,7 +120,7 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
108 i_ipaddr->ipaddr = ipaddr; 120 i_ipaddr->ipaddr = ipaddr;
109 121
110 spin_lock_irq(&rds_ibdev->spinlock); 122 spin_lock_irq(&rds_ibdev->spinlock);
111 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 123 list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
112 spin_unlock_irq(&rds_ibdev->spinlock); 124 spin_unlock_irq(&rds_ibdev->spinlock);
113 125
114 return 0; 126 return 0;
@@ -116,17 +128,24 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
116 128
117static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 129static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
118{ 130{
119 struct rds_ib_ipaddr *i_ipaddr, *next; 131 struct rds_ib_ipaddr *i_ipaddr;
132 struct rds_ib_ipaddr *to_free = NULL;
133
120 134
121 spin_lock_irq(&rds_ibdev->spinlock); 135 spin_lock_irq(&rds_ibdev->spinlock);
122 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { 136 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
123 if (i_ipaddr->ipaddr == ipaddr) { 137 if (i_ipaddr->ipaddr == ipaddr) {
124 list_del(&i_ipaddr->list); 138 list_del_rcu(&i_ipaddr->list);
125 kfree(i_ipaddr); 139 to_free = i_ipaddr;
126 break; 140 break;
127 } 141 }
128 } 142 }
129 spin_unlock_irq(&rds_ibdev->spinlock); 143 spin_unlock_irq(&rds_ibdev->spinlock);
144
145 if (to_free) {
146 synchronize_rcu();
147 kfree(to_free);
148 }
130} 149}
131 150
132int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 151int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -134,8 +153,10 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
134 struct rds_ib_device *rds_ibdev_old; 153 struct rds_ib_device *rds_ibdev_old;
135 154
136 rds_ibdev_old = rds_ib_get_device(ipaddr); 155 rds_ibdev_old = rds_ib_get_device(ipaddr);
137 if (rds_ibdev_old) 156 if (rds_ibdev_old) {
138 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 157 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
158 rds_ib_dev_put(rds_ibdev_old);
159 }
139 160
140 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 161 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
141} 162}
@@ -150,12 +171,13 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
150 BUG_ON(list_empty(&ic->ib_node)); 171 BUG_ON(list_empty(&ic->ib_node));
151 list_del(&ic->ib_node); 172 list_del(&ic->ib_node);
152 173
153 spin_lock_irq(&rds_ibdev->spinlock); 174 spin_lock(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 175 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 176 spin_unlock(&rds_ibdev->spinlock);
156 spin_unlock_irq(&ib_nodev_conns_lock); 177 spin_unlock_irq(&ib_nodev_conns_lock);
157 178
158 ic->rds_ibdev = rds_ibdev; 179 ic->rds_ibdev = rds_ibdev;
180 atomic_inc(&rds_ibdev->refcount);
159} 181}
160 182
161void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 183void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -175,18 +197,18 @@ void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *
175 spin_unlock(&ib_nodev_conns_lock); 197 spin_unlock(&ib_nodev_conns_lock);
176 198
177 ic->rds_ibdev = NULL; 199 ic->rds_ibdev = NULL;
200 rds_ib_dev_put(rds_ibdev);
178} 201}
179 202
180void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) 203void rds_ib_destroy_nodev_conns(void)
181{ 204{
182 struct rds_ib_connection *ic, *_ic; 205 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 206 LIST_HEAD(tmp_list);
184 207
185 /* avoid calling conn_destroy with irqs off */ 208 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(list_lock); 209 spin_lock_irq(&ib_nodev_conns_lock);
187 list_splice(list, &tmp_list); 210 list_splice(&ib_nodev_conns, &tmp_list);
188 INIT_LIST_HEAD(list); 211 spin_unlock_irq(&ib_nodev_conns_lock);
189 spin_unlock_irq(list_lock);
190 212
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 213 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
192 rds_conn_destroy(ic->conn); 214 rds_conn_destroy(ic->conn);
@@ -200,12 +222,12 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
200 if (!pool) 222 if (!pool)
201 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
202 224
203 INIT_LIST_HEAD(&pool->free_list); 225 INIT_XLIST_HEAD(&pool->free_list);
204 INIT_LIST_HEAD(&pool->drop_list); 226 INIT_XLIST_HEAD(&pool->drop_list);
205 INIT_LIST_HEAD(&pool->clean_list); 227 INIT_XLIST_HEAD(&pool->clean_list);
206 mutex_init(&pool->flush_lock); 228 mutex_init(&pool->flush_lock);
207 spin_lock_init(&pool->list_lock); 229 init_waitqueue_head(&pool->flush_wait);
208 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 230 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
209 231
210 pool->fmr_attr.max_pages = fmr_message_size; 232 pool->fmr_attr.max_pages = fmr_message_size;
211 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 233 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
@@ -233,34 +255,60 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
233 255
234void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 256void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 257{
236 flush_workqueue(rds_wq); 258 cancel_delayed_work_sync(&pool->flush_worker);
237 rds_ib_flush_mr_pool(pool, 1); 259 rds_ib_flush_mr_pool(pool, 1, NULL);
238 WARN_ON(atomic_read(&pool->item_count)); 260 WARN_ON(atomic_read(&pool->item_count));
239 WARN_ON(atomic_read(&pool->free_pinned)); 261 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 262 kfree(pool);
241} 263}
242 264
265static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
266 struct rds_ib_mr **ibmr_ret)
267{
268 struct xlist_head *ibmr_xl;
269 ibmr_xl = xlist_del_head_fast(xl);
270 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
271}
272
243static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 273static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
244{ 274{
245 struct rds_ib_mr *ibmr = NULL; 275 struct rds_ib_mr *ibmr = NULL;
246 unsigned long flags; 276 struct xlist_head *ret;
277 unsigned long *flag;
247 278
248 spin_lock_irqsave(&pool->list_lock, flags); 279 preempt_disable();
249 if (!list_empty(&pool->clean_list)) { 280 flag = &__get_cpu_var(clean_list_grace);
250 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 281 set_bit(CLEAN_LIST_BUSY_BIT, flag);
251 list_del_init(&ibmr->list); 282 ret = xlist_del_head(&pool->clean_list);
252 } 283 if (ret)
253 spin_unlock_irqrestore(&pool->list_lock, flags); 284 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
254 285
286 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
287 preempt_enable();
255 return ibmr; 288 return ibmr;
256} 289}
257 290
291static inline void wait_clean_list_grace(void)
292{
293 int cpu;
294 unsigned long *flag;
295
296 for_each_online_cpu(cpu) {
297 flag = &per_cpu(clean_list_grace, cpu);
298 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
299 cpu_relax();
300 }
301}
302
258static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 303static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
259{ 304{
260 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 305 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
261 struct rds_ib_mr *ibmr = NULL; 306 struct rds_ib_mr *ibmr = NULL;
262 int err = 0, iter = 0; 307 int err = 0, iter = 0;
263 308
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
311
264 while (1) { 312 while (1) {
265 ibmr = rds_ib_reuse_fmr(pool); 313 ibmr = rds_ib_reuse_fmr(pool);
266 if (ibmr) 314 if (ibmr)
@@ -287,19 +335,24 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
287 335
288 /* We do have some empty MRs. Flush them out. */ 336 /* We do have some empty MRs. Flush them out. */
289 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 337 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
290 rds_ib_flush_mr_pool(pool, 0); 338 rds_ib_flush_mr_pool(pool, 0, &ibmr);
339 if (ibmr)
340 return ibmr;
291 } 341 }
292 342
293 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 343 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
294 if (!ibmr) { 344 if (!ibmr) {
295 err = -ENOMEM; 345 err = -ENOMEM;
296 goto out_no_cigar; 346 goto out_no_cigar;
297 } 347 }
298 348
349 memset(ibmr, 0, sizeof(*ibmr));
350
299 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 351 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
300 (IB_ACCESS_LOCAL_WRITE | 352 (IB_ACCESS_LOCAL_WRITE |
301 IB_ACCESS_REMOTE_READ | 353 IB_ACCESS_REMOTE_READ |
302 IB_ACCESS_REMOTE_WRITE), 354 IB_ACCESS_REMOTE_WRITE|
355 IB_ACCESS_REMOTE_ATOMIC),
303 &pool->fmr_attr); 356 &pool->fmr_attr);
304 if (IS_ERR(ibmr->fmr)) { 357 if (IS_ERR(ibmr->fmr)) {
305 err = PTR_ERR(ibmr->fmr); 358 err = PTR_ERR(ibmr->fmr);
@@ -367,7 +420,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
367 if (page_cnt > fmr_message_size) 420 if (page_cnt > fmr_message_size)
368 return -EINVAL; 421 return -EINVAL;
369 422
370 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); 423 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
424 rdsibdev_to_node(rds_ibdev));
371 if (!dma_pages) 425 if (!dma_pages)
372 return -ENOMEM; 426 return -ENOMEM;
373 427
@@ -441,7 +495,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 495
442 /* FIXME we need a way to tell a r/w MR 496 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 497 * from a r/o MR */
444 BUG_ON(in_interrupt()); 498 BUG_ON(irqs_disabled());
445 set_page_dirty(page); 499 set_page_dirty(page);
446 put_page(page); 500 put_page(page);
447 } 501 }
@@ -477,33 +531,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
477} 531}
478 532
479/* 533/*
534 * given an xlist of mrs, put them all into the list_head for more processing
535 */
536static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
537{
538 struct rds_ib_mr *ibmr;
539 struct xlist_head splice;
540 struct xlist_head *cur;
541 struct xlist_head *next;
542
543 splice.next = NULL;
544 xlist_splice(xlist, &splice);
545 cur = splice.next;
546 while (cur) {
547 next = cur->next;
548 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
549 list_add_tail(&ibmr->unmap_list, list);
550 cur = next;
551 }
552}
553
554/*
555 * this takes a list head of mrs and turns it into an xlist of clusters.
556 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
557 * reuse.
558 */
559static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
560 struct list_head *list, struct xlist_head *xlist,
561 struct xlist_head **tail_ret)
562{
563 struct rds_ib_mr *ibmr;
564 struct xlist_head *cur_mr = xlist;
565 struct xlist_head *tail_mr = NULL;
566
567 list_for_each_entry(ibmr, list, unmap_list) {
568 tail_mr = &ibmr->xlist;
569 tail_mr->next = NULL;
570 cur_mr->next = tail_mr;
571 cur_mr = tail_mr;
572 }
573 *tail_ret = tail_mr;
574}
575
576/*
480 * Flush our pool of MRs. 577 * Flush our pool of MRs.
481 * At a minimum, all currently unused MRs are unmapped. 578 * At a minimum, all currently unused MRs are unmapped.
482 * If the number of MRs allocated exceeds the limit, we also try 579 * If the number of MRs allocated exceeds the limit, we also try
483 * to free as many MRs as needed to get back to this limit. 580 * to free as many MRs as needed to get back to this limit.
484 */ 581 */
485static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 582static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
583 int free_all, struct rds_ib_mr **ibmr_ret)
486{ 584{
487 struct rds_ib_mr *ibmr, *next; 585 struct rds_ib_mr *ibmr, *next;
586 struct xlist_head clean_xlist;
587 struct xlist_head *clean_tail;
488 LIST_HEAD(unmap_list); 588 LIST_HEAD(unmap_list);
489 LIST_HEAD(fmr_list); 589 LIST_HEAD(fmr_list);
490 unsigned long unpinned = 0; 590 unsigned long unpinned = 0;
491 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, free_goal; 591 unsigned int nfreed = 0, ncleaned = 0, free_goal;
493 int ret = 0; 592 int ret = 0;
494 593
495 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 594 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
496 595
497 mutex_lock(&pool->flush_lock); 596 if (ibmr_ret) {
597 DEFINE_WAIT(wait);
598 while(!mutex_trylock(&pool->flush_lock)) {
599 ibmr = rds_ib_reuse_fmr(pool);
600 if (ibmr) {
601 *ibmr_ret = ibmr;
602 finish_wait(&pool->flush_wait, &wait);
603 goto out_nolock;
604 }
605
606 prepare_to_wait(&pool->flush_wait, &wait,
607 TASK_UNINTERRUPTIBLE);
608 if (xlist_empty(&pool->clean_list))
609 schedule();
610
611 ibmr = rds_ib_reuse_fmr(pool);
612 if (ibmr) {
613 *ibmr_ret = ibmr;
614 finish_wait(&pool->flush_wait, &wait);
615 goto out_nolock;
616 }
617 }
618 finish_wait(&pool->flush_wait, &wait);
619 } else
620 mutex_lock(&pool->flush_lock);
621
622 if (ibmr_ret) {
623 ibmr = rds_ib_reuse_fmr(pool);
624 if (ibmr) {
625 *ibmr_ret = ibmr;
626 goto out;
627 }
628 }
498 629
499 spin_lock_irqsave(&pool->list_lock, flags);
500 /* Get the list of all MRs to be dropped. Ordering matters - 630 /* Get the list of all MRs to be dropped. Ordering matters -
501 * we want to put drop_list ahead of free_list. */ 631 * we want to put drop_list ahead of free_list.
502 list_splice_init(&pool->free_list, &unmap_list); 632 */
503 list_splice_init(&pool->drop_list, &unmap_list); 633 xlist_append_to_list(&pool->drop_list, &unmap_list);
634 xlist_append_to_list(&pool->free_list, &unmap_list);
504 if (free_all) 635 if (free_all)
505 list_splice_init(&pool->clean_list, &unmap_list); 636 xlist_append_to_list(&pool->clean_list, &unmap_list);
506 spin_unlock_irqrestore(&pool->list_lock, flags);
507 637
508 free_goal = rds_ib_flush_goal(pool, free_all); 638 free_goal = rds_ib_flush_goal(pool, free_all);
509 639
@@ -511,19 +641,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
511 goto out; 641 goto out;
512 642
513 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 643 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
514 list_for_each_entry(ibmr, &unmap_list, list) 644 list_for_each_entry(ibmr, &unmap_list, unmap_list)
515 list_add(&ibmr->fmr->list, &fmr_list); 645 list_add(&ibmr->fmr->list, &fmr_list);
646
516 ret = ib_unmap_fmr(&fmr_list); 647 ret = ib_unmap_fmr(&fmr_list);
517 if (ret) 648 if (ret)
518 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 649 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
519 650
520 /* Now we can destroy the DMA mapping and unpin any pages */ 651 /* Now we can destroy the DMA mapping and unpin any pages */
521 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 652 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
522 unpinned += ibmr->sg_len; 653 unpinned += ibmr->sg_len;
523 __rds_ib_teardown_mr(ibmr); 654 __rds_ib_teardown_mr(ibmr);
524 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 655 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
525 rds_ib_stats_inc(s_ib_rdma_mr_free); 656 rds_ib_stats_inc(s_ib_rdma_mr_free);
526 list_del(&ibmr->list); 657 list_del(&ibmr->unmap_list);
527 ib_dealloc_fmr(ibmr->fmr); 658 ib_dealloc_fmr(ibmr->fmr);
528 kfree(ibmr); 659 kfree(ibmr);
529 nfreed++; 660 nfreed++;
@@ -531,9 +662,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
531 ncleaned++; 662 ncleaned++;
532 } 663 }
533 664
534 spin_lock_irqsave(&pool->list_lock, flags); 665 if (!list_empty(&unmap_list)) {
535 list_splice(&unmap_list, &pool->clean_list); 666 /* we have to make sure that none of the things we're about
536 spin_unlock_irqrestore(&pool->list_lock, flags); 667 * to put on the clean list would race with other cpus trying
668 * to pull items off. The xlist would explode if we managed to
669 * remove something from the clean list and then add it back again
670 * while another CPU was spinning on that same item in xlist_del_head.
671 *
672 * This is pretty unlikely, but just in case wait for an xlist grace period
673 * here before adding anything back into the clean list.
674 */
675 wait_clean_list_grace();
676
677 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
678 if (ibmr_ret)
679 refill_local(pool, &clean_xlist, ibmr_ret);
680
681 /* refill_local may have emptied our list */
682 if (!xlist_empty(&clean_xlist))
683 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
684
685 }
537 686
538 atomic_sub(unpinned, &pool->free_pinned); 687 atomic_sub(unpinned, &pool->free_pinned);
539 atomic_sub(ncleaned, &pool->dirty_count); 688 atomic_sub(ncleaned, &pool->dirty_count);
@@ -541,14 +690,35 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
541 690
542out: 691out:
543 mutex_unlock(&pool->flush_lock); 692 mutex_unlock(&pool->flush_lock);
693 if (waitqueue_active(&pool->flush_wait))
694 wake_up(&pool->flush_wait);
695out_nolock:
544 return ret; 696 return ret;
545} 697}
546 698
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
547static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 717static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
548{ 718{
549 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
550 720
551 rds_ib_flush_mr_pool(pool, 0); 721 rds_ib_flush_mr_pool(pool, 0, NULL);
552} 722}
553 723
554void rds_ib_free_mr(void *trans_private, int invalidate) 724void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -556,47 +726,49 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
556 struct rds_ib_mr *ibmr = trans_private; 726 struct rds_ib_mr *ibmr = trans_private;
557 struct rds_ib_device *rds_ibdev = ibmr->device; 727 struct rds_ib_device *rds_ibdev = ibmr->device;
558 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 728 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
559 unsigned long flags;
560 729
561 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 730 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
562 731
563 /* Return it to the pool's free list */ 732 /* Return it to the pool's free list */
564 spin_lock_irqsave(&pool->list_lock, flags);
565 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 733 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
566 list_add(&ibmr->list, &pool->drop_list); 734 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
567 else 735 else
568 list_add(&ibmr->list, &pool->free_list); 736 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
569 737
570 atomic_add(ibmr->sg_len, &pool->free_pinned); 738 atomic_add(ibmr->sg_len, &pool->free_pinned);
571 atomic_inc(&pool->dirty_count); 739 atomic_inc(&pool->dirty_count);
572 spin_unlock_irqrestore(&pool->list_lock, flags);
573 740
574 /* If we've pinned too many pages, request a flush */ 741 /* If we've pinned too many pages, request a flush */
575 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
576 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 743 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
577 queue_work(rds_wq, &pool->flush_worker); 744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
578 745
579 if (invalidate) { 746 if (invalidate) {
580 if (likely(!in_interrupt())) { 747 if (likely(!in_interrupt())) {
581 rds_ib_flush_mr_pool(pool, 0); 748 rds_ib_flush_mr_pool(pool, 0, NULL);
582 } else { 749 } else {
583 /* We get here if the user created a MR marked 750 /* We get here if the user created a MR marked
584 * as use_once and invalidate at the same time. */ 751 * as use_once and invalidate at the same time. */
585 queue_work(rds_wq, &pool->flush_worker); 752 queue_delayed_work(rds_ib_fmr_wq,
753 &pool->flush_worker, 10);
586 } 754 }
587 } 755 }
756
757 rds_ib_dev_put(rds_ibdev);
588} 758}
589 759
590void rds_ib_flush_mrs(void) 760void rds_ib_flush_mrs(void)
591{ 761{
592 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
593 763
764 down_read(&rds_ib_devices_lock);
594 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
595 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
596 767
597 if (pool) 768 if (pool)
598 rds_ib_flush_mr_pool(pool, 0); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } 770 }
771 up_read(&rds_ib_devices_lock);
600} 772}
601 773
602void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -628,6 +800,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
628 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 800 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
629 801
630 ibmr->device = rds_ibdev; 802 ibmr->device = rds_ibdev;
803 rds_ibdev = NULL;
631 804
632 out: 805 out:
633 if (ret) { 806 if (ret) {
@@ -635,5 +808,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
635 rds_ib_free_mr(ibmr, 0); 808 rds_ib_free_mr(ibmr, 0);
636 ibmr = ERR_PTR(ret); 809 ibmr = ERR_PTR(ret);
637 } 810 }
811 if (rds_ibdev)
812 rds_ib_dev_put(rds_ibdev);
638 return ibmr; 813 return ibmr;
639} 814}
815
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c74e9904a6b2..e29e0ca32f74 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,42 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL);
57 kmem_cache_free(rds_ib_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
70 struct rds_ib_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
83{ 47{
84 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -95,16 +59,161 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
95 recv->r_wr.sg_list = recv->r_sge; 59 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IB_RECV_SGE; 60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
97 61
98 sge = rds_ib_data_sge(ic, recv->r_sge); 62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
99 sge->addr = 0; 68 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE; 69 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = ic->i_mr->lkey; 70 sge->lkey = ic->i_mr->lkey;
71 }
72}
102 73
103 sge = rds_ib_header_sge(ic, recv->r_sge); 74/*
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); 75 * The entire 'from' list, including the from element itself, is put on
105 sge->length = sizeof(struct rds_header); 76 * to the tail of the 'to' list.
106 sge->lkey = ic->i_mr->lkey; 77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
107 } 129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
108} 217}
109 218
110static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
@@ -115,10 +224,8 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
115 recv->r_ibinc = NULL; 224 recv->r_ibinc = NULL;
116 } 225 }
117 if (recv->r_frag) { 226 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
119 if (recv->r_frag->f_page) 228 rds_ib_frag_free(ic, recv->r_frag);
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 229 recv->r_frag = NULL;
123 } 230 }
124} 231}
@@ -129,84 +236,111 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 236
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 239}
136 240
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
138 struct rds_ib_recv_work *recv, 242 gfp_t slab_mask)
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{ 243{
141 struct rds_ib_connection *ic = conn->c_transport_data; 244 struct rds_ib_incoming *ibinc;
142 dma_addr_t dma_addr; 245 struct list_head *cache_item;
143 struct ib_sge *sge; 246 int avail_allocs;
144 int ret = -ENOMEM;
145 247
146 if (recv->r_ibinc == NULL) { 248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
147 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
148 rds_ib_stats_inc(s_ib_rx_alloc_limit); 255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
149 goto out; 256 return NULL;
150 } 257 }
151 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, 258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
152 kptr_gfp); 259 if (!ibinc) {
153 if (recv->r_ibinc == NULL) {
154 atomic_dec(&rds_ib_allocation); 260 atomic_dec(&rds_ib_allocation);
155 goto out; 261 return NULL;
156 } 262 }
157 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
158 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
159 } 263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
160 266
161 if (recv->r_frag == NULL) { 267 return ibinc;
162 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); 268}
163 if (recv->r_frag == NULL) 269
164 goto out; 270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 271 gfp_t slab_mask, gfp_t page_mask)
166 recv->r_frag->f_page = NULL; 272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
167 } 292 }
168 293
169 if (ic->i_frag.f_page == NULL) { 294 INIT_LIST_HEAD(&frag->f_item);
170 ic->i_frag.f_page = alloc_page(page_gfp); 295
171 if (ic->i_frag.f_page == NULL) 296 return frag;
172 goto out; 297}
173 ic->i_frag.f_offset = 0; 298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
174 } 311 }
175 312
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 313 if (!ic->i_cache_incs.ready)
177 ic->i_frag.f_page, 314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
178 ic->i_frag.f_offset, 315 if (!ic->i_cache_frags.ready)
179 RDS_FRAG_SIZE, 316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183 317
184 /* 318 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() 319 * ibinc was taken from recv if recv contained the start of a message.
186 * must be called on this recv. This happens as completions hit 320 * recvs that were continuations will still have this allocated.
187 * in order or on connection shutdown.
188 */ 321 */
189 recv->r_frag->f_page = ic->i_frag.f_page; 322 if (!recv->r_ibinc) {
190 recv->r_frag->f_offset = ic->i_frag.f_offset; 323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
191 recv->r_frag->f_mapped = dma_addr; 324 if (!recv->r_ibinc)
325 goto out;
326 }
192 327
193 sge = rds_ib_data_sge(ic, recv->r_sge); 328 WARN_ON(recv->r_frag); /* leak! */
194 sge->addr = dma_addr; 329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
195 sge->length = RDS_FRAG_SIZE; 330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
196 336
197 sge = rds_ib_header_sge(ic, recv->r_sge); 337 sge = &recv->r_sge[0];
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
200 340
201 get_page(recv->r_frag->f_page); 341 sge = &recv->r_sge[1];
202 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210 344
211 ret = 0; 345 ret = 0;
212out: 346out:
@@ -216,13 +350,11 @@ out:
216/* 350/*
217 * This tries to allocate and post unused work requests after making sure that 351 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into 352 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 353 * sockets.
220 * pairs don't go unmatched.
221 * 354 *
222 * -1 is returned if posting fails due to temporary resource exhaustion. 355 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */ 356 */
224int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
225 gfp_t page_gfp, int prefill)
226{ 358{
227 struct rds_ib_connection *ic = conn->c_transport_data; 359 struct rds_ib_connection *ic = conn->c_transport_data;
228 struct rds_ib_recv_work *recv; 360 struct rds_ib_recv_work *recv;
@@ -236,28 +368,25 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
236 if (pos >= ic->i_recv_ring.w_nr) { 368 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos); 370 pos);
239 ret = -EINVAL;
240 break; 371 break;
241 } 372 }
242 373
243 recv = &ic->i_recvs[pos]; 374 recv = &ic->i_recvs[pos];
244 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); 375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
245 if (ret) { 376 if (ret) {
246 ret = -1;
247 break; 377 break;
248 } 378 }
249 379
250 /* XXX when can this fail? */ 380 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_ibinc, recv->r_frag->f_page, 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
254 (long) recv->r_frag->f_mapped, ret); 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
255 if (ret) { 385 if (ret) {
256 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr, 388 "reconnecting\n", &conn->c_faddr,
259 ret); 389 ret);
260 ret = -1;
261 break; 390 break;
262 } 391 }
263 392
@@ -270,37 +399,73 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
270 399
271 if (ret) 400 if (ret)
272 rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 401 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274} 402}
275 403
276void rds_ib_inc_purge(struct rds_incoming *inc) 404/*
405 * We want to recycle several types of recv allocations, like incs and frags.
406 * To use this, the *_free() function passes in the ptr to a list_head within
407 * the recyclee, as well as the cache to put it on.
408 *
409 * First, we put the memory on a percpu list. When this reaches a certain size,
410 * We move it to an intermediate non-percpu list in a lockless manner, with some
411 * xchg/compxchg wizardry.
412 *
413 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
414 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
415 * list_empty() will return true with one element is actually present.
416 */
417static void rds_ib_recv_cache_put(struct list_head *new_item,
418 struct rds_ib_refill_cache *cache)
277{ 419{
278 struct rds_ib_incoming *ibinc; 420 unsigned long flags;
279 struct rds_page_frag *frag; 421 struct rds_ib_cache_head *chp;
280 struct rds_page_frag *pos; 422 struct list_head *old;
281 423
282 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 424 local_irq_save(flags);
283 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
284 425
285 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
286 list_del_init(&frag->f_item); 427 if (!chp->first)
287 rds_ib_frag_drop_page(frag); 428 INIT_LIST_HEAD(new_item);
288 rds_ib_frag_free(frag); 429 else /* put on front */
289 } 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
433
434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
435 goto end;
436
437 /*
438 * Return our per-cpu first list to the cache's xfer by atomically
439 * grabbing the current xfer list, appending it to our per-cpu list,
440 * and then atomically returning that entire list back to the
441 * cache's xfer list as long as it's still empty.
442 */
443 do {
444 old = xchg(&cache->xfer, NULL);
445 if (old)
446 list_splice_entire_tail(old, chp->first);
447 old = cmpxchg(&cache->xfer, NULL, chp->first);
448 } while (old);
449
450 chp->first = NULL;
451 chp->count = 0;
452end:
453 local_irq_restore(flags);
290} 454}
291 455
292void rds_ib_inc_free(struct rds_incoming *inc) 456static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
293{ 457{
294 struct rds_ib_incoming *ibinc; 458 struct list_head *head = cache->ready;
295 459
296 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 460 if (head) {
461 if (!list_empty(head)) {
462 cache->ready = head->next;
463 list_del_init(head);
464 } else
465 cache->ready = NULL;
466 }
297 467
298 rds_ib_inc_purge(inc); 468 return head;
299 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
300 BUG_ON(!list_empty(&ibinc->ii_frags));
301 kmem_cache_free(rds_ib_incoming_slab, ibinc);
302 atomic_dec(&rds_ib_allocation);
303 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
304} 469}
305 470
306int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 471int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -336,13 +501,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
336 to_copy = min_t(unsigned long, to_copy, len - copied); 501 to_copy = min_t(unsigned long, to_copy, len - copied);
337 502
338 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 503 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
339 "[%p, %lu] + %lu\n", 504 "[%p, %u] + %lu\n",
340 to_copy, iov->iov_base, iov->iov_len, iov_off, 505 to_copy, iov->iov_base, iov->iov_len, iov_off,
341 frag->f_page, frag->f_offset, frag_off); 506 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
342 507
343 /* XXX needs + offset for multiple recvs per page */ 508 /* XXX needs + offset for multiple recvs per page */
344 ret = rds_page_copy_to_user(frag->f_page, 509 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
345 frag->f_offset + frag_off, 510 frag->f_sg.offset + frag_off,
346 iov->iov_base + iov_off, 511 iov->iov_base + iov_off,
347 to_copy); 512 to_copy);
348 if (ret) { 513 if (ret) {
@@ -557,47 +722,6 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
557 return rds_ib_get_ack(ic); 722 return rds_ib_get_ack(ic);
558} 723}
559 724
560static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
561 struct rds_ib_recv_work *recv,
562 u32 data_len)
563{
564 struct rds_ib_connection *ic = conn->c_transport_data;
565 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
566 void *addr;
567 u32 misplaced_hdr_bytes;
568
569 /*
570 * Support header at the front (RDS 3.1+) as well as header-at-end.
571 *
572 * Cases:
573 * 1) header all in header buff (great!)
574 * 2) header all in data page (copy all to header buff)
575 * 3) header split across hdr buf + data page
576 * (move bit in hdr buff to end before copying other bit from data page)
577 */
578 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
579 return hdr_buff;
580
581 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
582 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
583 memcpy(hdr_buff,
584 addr + recv->r_frag->f_offset + data_len,
585 sizeof(struct rds_header));
586 kunmap_atomic(addr, KM_SOFTIRQ0);
587 return hdr_buff;
588 }
589
590 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
591
592 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
593
594 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
595 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
596 sizeof(struct rds_header) - misplaced_hdr_bytes);
597 kunmap_atomic(addr, KM_SOFTIRQ0);
598 return hdr_buff;
599}
600
601/* 725/*
602 * It's kind of lame that we're copying from the posted receive pages into 726 * It's kind of lame that we're copying from the posted receive pages into
603 * long-lived bitmaps. We could have posted the bitmaps and rdma written into 727 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
@@ -639,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
639 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
640 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
641 765
642 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
643 767
644 src = addr + frag_off; 768 src = addr + frag_off;
645 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -710,7 +834,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
710 } 834 }
711 data_len -= sizeof(struct rds_header); 835 data_len -= sizeof(struct rds_header);
712 836
713 ihdr = rds_ib_get_header(conn, recv, data_len); 837 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
714 838
715 /* Validate the checksum. */ 839 /* Validate the checksum. */
716 if (!rds_message_verify_checksum(ihdr)) { 840 if (!rds_message_verify_checksum(ihdr)) {
@@ -742,12 +866,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
742 * the inc is freed. We don't go that route, so we have to drop the 866 * the inc is freed. We don't go that route, so we have to drop the
743 * page ref ourselves. We can't just leave the page on the recv 867 * page ref ourselves. We can't just leave the page on the recv
744 * because that confuses the dma mapping of pages and each recv's use 868 * because that confuses the dma mapping of pages and each recv's use
745 * of a partial page. We can leave the frag, though, it will be 869 * of a partial page.
746 * reused.
747 * 870 *
748 * FIXME: Fold this into the code path below. 871 * FIXME: Fold this into the code path below.
749 */ 872 */
750 rds_ib_frag_drop_page(recv->r_frag); 873 rds_ib_frag_free(ic, recv->r_frag);
874 recv->r_frag = NULL;
751 return; 875 return;
752 } 876 }
753 877
@@ -757,7 +881,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
757 * into the inc and save the inc so we can hang upcoming fragments 881 * into the inc and save the inc so we can hang upcoming fragments
758 * off its list. 882 * off its list.
759 */ 883 */
760 if (ibinc == NULL) { 884 if (!ibinc) {
761 ibinc = recv->r_ibinc; 885 ibinc = recv->r_ibinc;
762 recv->r_ibinc = NULL; 886 recv->r_ibinc = NULL;
763 ic->i_ibinc = ibinc; 887 ic->i_ibinc = ibinc;
@@ -842,32 +966,38 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
842 struct rds_ib_recv_work *recv; 966 struct rds_ib_recv_work *recv;
843 967
844 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 968 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
845 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 969 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
846 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 970 (unsigned long long)wc.wr_id, wc.status,
971 rds_ib_wc_status_str(wc.status), wc.byte_len,
847 be32_to_cpu(wc.ex.imm_data)); 972 be32_to_cpu(wc.ex.imm_data));
848 rds_ib_stats_inc(s_ib_rx_cq_event); 973 rds_ib_stats_inc(s_ib_rx_cq_event);
849 974
850 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 975 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
851 976
852 rds_ib_recv_unmap_page(ic, recv); 977 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
853 978
854 /* 979 /*
855 * Also process recvs in connecting state because it is possible 980 * Also process recvs in connecting state because it is possible
856 * to get a recv completion _before_ the rdmacm ESTABLISHED 981 * to get a recv completion _before_ the rdmacm ESTABLISHED
857 * event is processed. 982 * event is processed.
858 */ 983 */
859 if (rds_conn_up(conn) || rds_conn_connecting(conn)) { 984 if (wc.status == IB_WC_SUCCESS) {
985 rds_ib_process_recv(conn, recv, wc.byte_len, state);
986 } else {
860 /* We expect errors as the qp is drained during shutdown */ 987 /* We expect errors as the qp is drained during shutdown */
861 if (wc.status == IB_WC_SUCCESS) { 988 if (rds_conn_up(conn) || rds_conn_connecting(conn))
862 rds_ib_process_recv(conn, recv, wc.byte_len, state); 989 rds_ib_conn_error(conn, "recv completion on %pI4 had "
863 } else { 990 "status %u (%s), disconnecting and "
864 rds_ib_conn_error(conn, "recv completion on " 991 "reconnecting\n", &conn->c_faddr,
865 "%pI4 had status %u, disconnecting and " 992 wc.status,
866 "reconnecting\n", &conn->c_faddr, 993 rds_ib_wc_status_str(wc.status));
867 wc.status);
868 }
869 } 994 }
870 995
996 /*
997 * It's very important that we only free this ring entry if we've truly
998 * freed the resources allocated to the entry. The refilling path can
999 * leak if we don't.
1000 */
871 rds_ib_ring_free(&ic->i_recv_ring, 1); 1001 rds_ib_ring_free(&ic->i_recv_ring, 1);
872 } 1002 }
873} 1003}
@@ -897,11 +1027,8 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
897 if (rds_ib_ring_empty(&ic->i_recv_ring)) 1027 if (rds_ib_ring_empty(&ic->i_recv_ring))
898 rds_ib_stats_inc(s_ib_rx_ring_empty); 1028 rds_ib_stats_inc(s_ib_rx_ring_empty);
899 1029
900 /*
901 * If the ring is running low, then schedule the thread to refill.
902 */
903 if (rds_ib_ring_low(&ic->i_recv_ring)) 1030 if (rds_ib_ring_low(&ic->i_recv_ring))
904 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 1031 rds_ib_recv_refill(conn, 0);
905} 1032}
906 1033
907int rds_ib_recv(struct rds_connection *conn) 1034int rds_ib_recv(struct rds_connection *conn)
@@ -910,25 +1037,13 @@ int rds_ib_recv(struct rds_connection *conn)
910 int ret = 0; 1037 int ret = 0;
911 1038
912 rdsdebug("conn %p\n", conn); 1039 rdsdebug("conn %p\n", conn);
913
914 /*
915 * If we get a temporary posting failure in this context then
916 * we're really low and we want the caller to back off for a bit.
917 */
918 mutex_lock(&ic->i_recv_mutex);
919 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
920 ret = -ENOMEM;
921 else
922 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
923 mutex_unlock(&ic->i_recv_mutex);
924
925 if (rds_conn_up(conn)) 1040 if (rds_conn_up(conn))
926 rds_ib_attempt_ack(ic); 1041 rds_ib_attempt_ack(ic);
927 1042
928 return ret; 1043 return ret;
929} 1044}
930 1045
931int __init rds_ib_recv_init(void) 1046int rds_ib_recv_init(void)
932{ 1047{
933 struct sysinfo si; 1048 struct sysinfo si;
934 int ret = -ENOMEM; 1049 int ret = -ENOMEM;
@@ -939,14 +1054,14 @@ int __init rds_ib_recv_init(void)
939 1054
940 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", 1055 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
941 sizeof(struct rds_ib_incoming), 1056 sizeof(struct rds_ib_incoming),
942 0, 0, NULL); 1057 0, SLAB_HWCACHE_ALIGN, NULL);
943 if (rds_ib_incoming_slab == NULL) 1058 if (!rds_ib_incoming_slab)
944 goto out; 1059 goto out;
945 1060
946 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 1061 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
947 sizeof(struct rds_page_frag), 1062 sizeof(struct rds_page_frag),
948 0, 0, NULL); 1063 0, SLAB_HWCACHE_ALIGN, NULL);
949 if (rds_ib_frag_slab == NULL) 1064 if (!rds_ib_frag_slab)
950 kmem_cache_destroy(rds_ib_incoming_slab); 1065 kmem_cache_destroy(rds_ib_incoming_slab);
951 else 1066 else
952 ret = 0; 1067 ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 17fa80803ab0..71f373c421bc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,11 +36,49 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "ib.h" 39#include "ib.h"
41 40
42static void rds_ib_send_rdma_complete(struct rds_message *rm, 41static char *rds_ib_wc_status_strings[] = {
43 int wc_status) 42#define RDS_IB_WC_STATUS_STR(foo) \
43 [IB_WC_##foo] = __stringify(IB_WC_##foo)
44 RDS_IB_WC_STATUS_STR(SUCCESS),
45 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
46 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
49 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
50 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
51 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
52 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
53 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
54 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
55 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
56 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
57 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
59 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
60 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
61 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
63 RDS_IB_WC_STATUS_STR(FATAL_ERR),
64 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
65 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
66#undef RDS_IB_WC_STATUS_STR
67};
68
69char *rds_ib_wc_status_str(enum ib_wc_status status)
70{
71 return rds_str_array(rds_ib_wc_status_strings,
72 ARRAY_SIZE(rds_ib_wc_status_strings), status);
73}
74
75/*
76 * Convert IB-specific error message to RDS error message and call core
77 * completion handler.
78 */
79static void rds_ib_send_complete(struct rds_message *rm,
80 int wc_status,
81 void (*complete)(struct rds_message *rm, int status))
44{ 82{
45 int notify_status; 83 int notify_status;
46 84
@@ -60,69 +98,125 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
60 notify_status = RDS_RDMA_OTHER_ERROR; 98 notify_status = RDS_RDMA_OTHER_ERROR;
61 break; 99 break;
62 } 100 }
63 rds_rdma_send_complete(rm, notify_status); 101 complete(rm, notify_status);
102}
103
104static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
105 struct rm_data_op *op,
106 int wc_status)
107{
108 if (op->op_nents)
109 ib_dma_unmap_sg(ic->i_cm_id->device,
110 op->op_sg, op->op_nents,
111 DMA_TO_DEVICE);
64} 112}
65 113
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, 114static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op) 115 struct rm_rdma_op *op,
116 int wc_status)
68{ 117{
69 if (op->r_mapped) { 118 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 119 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 120 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 121 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 122 op->op_mapped = 0;
74 } 123 }
124
125 /* If the user asked for a completion notification on this
126 * message, we can implement three different semantics:
127 * 1. Notify when we received the ACK on the RDS message
128 * that was queued with the RDMA. This provides reliable
129 * notification of RDMA status at the expense of a one-way
130 * packet delay.
131 * 2. Notify when the IB stack gives us the completion event for
132 * the RDMA operation.
133 * 3. Notify when the IB stack gives us the completion event for
134 * the accompanying RDS messages.
135 * Here, we implement approach #3. To implement approach #2,
136 * we would need to take an event for the rdma WR. To implement #1,
137 * don't call rds_rdma_send_complete at all, and fall back to the notify
138 * handling in the ACK processing code.
139 *
140 * Note: There's no need to explicitly sync any RDMA buffers using
141 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
142 * operation itself unmapped the RDMA buffers, which takes care
143 * of synching.
144 */
145 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
146 wc_status, rds_rdma_send_complete);
147
148 if (op->op_write)
149 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
150 else
151 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
75} 152}
76 153
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, 154static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send, 155 struct rm_atomic_op *op,
79 int wc_status) 156 int wc_status)
80{ 157{
81 struct rds_message *rm = send->s_rm; 158 /* unmap atomic recvbuf */
82 159 if (op->op_mapped) {
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 160 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
84 161 DMA_FROM_DEVICE);
85 ib_dma_unmap_sg(ic->i_cm_id->device, 162 op->op_mapped = 0;
86 rm->m_sg, rm->m_nents, 163 }
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113 164
114 if (rm->m_rdma_op->r_write) 165 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 166 wc_status, rds_atomic_send_complete);
116 else 167
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 168 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
169 rds_ib_stats_inc(s_ib_atomic_cswp);
170 else
171 rds_ib_stats_inc(s_ib_atomic_fadd);
172}
173
174/*
175 * Unmap the resources associated with a struct send_work.
176 *
177 * Returns the rm for no good reason other than it is unobtainable
178 * other than by switching on wr.opcode, currently, and the caller,
179 * the event handler, needs it.
180 */
181static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
182 struct rds_ib_send_work *send,
183 int wc_status)
184{
185 struct rds_message *rm = NULL;
186
187 /* In the error case, wc.opcode sometimes contains garbage */
188 switch (send->s_wr.opcode) {
189 case IB_WR_SEND:
190 if (send->s_op) {
191 rm = container_of(send->s_op, struct rds_message, data);
192 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
193 }
194 break;
195 case IB_WR_RDMA_WRITE:
196 case IB_WR_RDMA_READ:
197 if (send->s_op) {
198 rm = container_of(send->s_op, struct rds_message, rdma);
199 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
200 }
201 break;
202 case IB_WR_ATOMIC_FETCH_AND_ADD:
203 case IB_WR_ATOMIC_CMP_AND_SWP:
204 if (send->s_op) {
205 rm = container_of(send->s_op, struct rds_message, atomic);
206 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
207 }
208 break;
209 default:
210 if (printk_ratelimit())
211 printk(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
118 } 215 }
119 216
120 /* If anyone waited for this message to get flushed out, wake 217 send->s_wr.opcode = 0xdead;
121 * them up now */
122 rds_message_unmapped(rm);
123 218
124 rds_message_put(rm); 219 return rm;
125 send->s_rm = NULL;
126} 220}
127 221
128void rds_ib_send_init_ring(struct rds_ib_connection *ic) 222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
@@ -133,23 +227,18 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge; 228 struct ib_sge *sge;
135 229
136 send->s_rm = NULL;
137 send->s_op = NULL; 230 send->s_op = NULL;
138 231
139 send->s_wr.wr_id = i; 232 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge; 233 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0; 234 send->s_wr.ex.imm_data = 0;
145 235
146 sge = rds_ib_data_sge(ic, send->s_sge); 236 sge = &send->s_sge[0];
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); 237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header); 238 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey; 239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
153 } 242 }
154} 243}
155 244
@@ -159,16 +248,24 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
159 u32 i; 248 u32 i;
160 249
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead) 251 if (send->s_op && send->s_wr.opcode != 0xdead)
163 continue; 252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 } 253 }
169} 254}
170 255
171/* 256/*
257 * The only fast path caller always has a non-zero nr, so we don't
258 * bother testing nr before performing the atomic sub.
259 */
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc 269 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially 270 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is 271 * unallocs the next free entry in the ring it doesn't alter which is
@@ -178,12 +275,14 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{ 275{
179 struct rds_connection *conn = context; 276 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data; 277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
181 struct ib_wc wc; 279 struct ib_wc wc;
182 struct rds_ib_send_work *send; 280 struct rds_ib_send_work *send;
183 u32 completed; 281 u32 completed;
184 u32 oldest; 282 u32 oldest;
185 u32 i = 0; 283 u32 i = 0;
186 int ret; 284 int ret;
285 int nr_sig = 0;
187 286
188 rdsdebug("cq %p conn %p\n", cq, conn); 287 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call); 288 rds_ib_stats_inc(s_ib_tx_cq_call);
@@ -192,8 +291,9 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193 292
194 while (ib_poll_cq(cq, 1, &wc) > 0) { 293 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data)); 297 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event); 298 rds_ib_stats_inc(s_ib_tx_cq_event);
199 299
@@ -210,51 +310,30 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
210 310
211 for (i = 0; i < completed; i++) { 311 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest]; 312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
213 315
214 /* In the error case, wc.opcode sometimes contains garbage */ 316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232 317
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies) 318 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled); 319 rds_ib_stats_inc(s_ib_tx_stalled);
237 320
238 /* If a RDMA operation produced an error, signal this right 321 if (send->s_op) {
239 * away. If we don't, the subsequent SEND that goes with this 322 if (send->s_op == rm->m_final_op) {
240 * RDMA will be canceled with ERR_WFLUSH, and the application 323 /* If anyone waited for this message to get flushed out, wake
241 * never learn that the RDMA failed. */ 324 * them up now */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { 325 rds_message_unmapped(rm);
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 } 326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
252 } 329 }
253 330
254 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
255 } 332 }
256 333
257 rds_ib_ring_free(&ic->i_send_ring, completed); 334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
258 337
259 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
260 test_bit(0, &conn->c_map_queued)) 339 test_bit(0, &conn->c_map_queued))
@@ -262,10 +341,10 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 341
263 /* We expect errors as the qp is drained during shutdown */ 342 /* We expect errors as the qp is drained during shutdown */
264 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { 343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
265 rds_ib_conn_error(conn, 344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
266 "send completion on %pI4 " 345 "%u (%s), disconnecting and reconnecting\n",
267 "had status %u, disconnecting and reconnecting\n", 346 &conn->c_faddr, wc.status,
268 &conn->c_faddr, wc.status); 347 rds_ib_wc_status_str(wc.status));
269 } 348 }
270 } 349 }
271} 350}
@@ -294,7 +373,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
294 * credits (see rds_ib_send_add_credits below). 373 * credits (see rds_ib_send_add_credits below).
295 * 374 *
296 * The RDS send code is essentially single-threaded; rds_send_xmit 375 * The RDS send code is essentially single-threaded; rds_send_xmit
297 * grabs c_send_lock to ensure exclusive access to the send ring. 376 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
298 * However, the ACK sending code is independent and can race with 377 * However, the ACK sending code is independent and can race with
299 * message SENDs. 378 * message SENDs.
300 * 379 *
@@ -413,40 +492,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
413 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
414} 493}
415 494
416static inline void 495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
417rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, 496 struct rds_ib_send_work *send,
418 struct rds_ib_send_work *send, unsigned int pos, 497 bool notify)
419 unsigned long buffer, unsigned int length,
420 int send_flags)
421{ 498{
422 struct ib_sge *sge; 499 /*
423 500 * We want to delay signaling completions just enough to get
424 WARN_ON(pos != send - ic->i_sends); 501 * the batching benefits but not so much that we create dead time
425 502 * on the wire.
426 send->s_wr.send_flags = send_flags; 503 */
427 send->s_wr.opcode = IB_WR_SEND; 504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
428 send->s_wr.num_sge = 2; 505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
429 send->s_wr.next = NULL; 506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
430 send->s_queued = jiffies; 507 return 1;
431 send->s_op = NULL;
432
433 if (length != 0) {
434 sge = rds_ib_data_sge(ic, send->s_sge);
435 sge->addr = buffer;
436 sge->length = length;
437 sge->lkey = ic->i_mr->lkey;
438
439 sge = rds_ib_header_sge(ic, send->s_sge);
440 } else {
441 /* We're sending a packet with no payload. There is only
442 * one SGE */
443 send->s_wr.num_sge = 1;
444 sge = &send->s_sge[0];
445 } 508 }
446 509 return 0;
447 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
448 sge->length = sizeof(struct rds_header);
449 sge->lkey = ic->i_mr->lkey;
450} 510}
451 511
452/* 512/*
@@ -475,13 +535,14 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
475 u32 pos; 535 u32 pos;
476 u32 i; 536 u32 i;
477 u32 work_alloc; 537 u32 work_alloc;
478 u32 credit_alloc; 538 u32 credit_alloc = 0;
479 u32 posted; 539 u32 posted;
480 u32 adv_credits = 0; 540 u32 adv_credits = 0;
481 int send_flags = 0; 541 int send_flags = 0;
482 int sent; 542 int bytes_sent = 0;
483 int ret; 543 int ret;
484 int flow_controlled = 0; 544 int flow_controlled = 0;
545 int nr_sig = 0;
485 546
486 BUG_ON(off % RDS_FRAG_SIZE); 547 BUG_ON(off % RDS_FRAG_SIZE);
487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -507,14 +568,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
507 goto out; 568 goto out;
508 } 569 }
509 570
510 credit_alloc = work_alloc;
511 if (ic->i_flowctl) { 571 if (ic->i_flowctl) {
512 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); 572 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
513 adv_credits += posted; 573 adv_credits += posted;
514 if (credit_alloc < work_alloc) { 574 if (credit_alloc < work_alloc) {
515 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); 575 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
516 work_alloc = credit_alloc; 576 work_alloc = credit_alloc;
517 flow_controlled++; 577 flow_controlled = 1;
518 } 578 }
519 if (work_alloc == 0) { 579 if (work_alloc == 0) {
520 set_bit(RDS_LL_SEND_FULL, &conn->c_flags); 580 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
@@ -525,31 +585,25 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
525 } 585 }
526 586
527 /* map the message the first time we see it */ 587 /* map the message the first time we see it */
528 if (ic->i_rm == NULL) { 588 if (!ic->i_data_op) {
529 /* 589 if (rm->data.op_nents) {
530 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", 590 rm->data.op_count = ib_dma_map_sg(dev,
531 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 591 rm->data.op_sg,
532 rm->m_inc.i_hdr.h_flags, 592 rm->data.op_nents,
533 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 593 DMA_TO_DEVICE);
534 */ 594 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
535 if (rm->m_nents) { 595 if (rm->data.op_count == 0) {
536 rm->m_count = ib_dma_map_sg(dev,
537 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
538 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
539 if (rm->m_count == 0) {
540 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 596 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
541 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 597 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
542 ret = -ENOMEM; /* XXX ? */ 598 ret = -ENOMEM; /* XXX ? */
543 goto out; 599 goto out;
544 } 600 }
545 } else { 601 } else {
546 rm->m_count = 0; 602 rm->data.op_count = 0;
547 } 603 }
548 604
549 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
550 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
551 rds_message_addref(rm); 605 rds_message_addref(rm);
552 ic->i_rm = rm; 606 ic->i_data_op = &rm->data;
553 607
554 /* Finalize the header */ 608 /* Finalize the header */
555 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) 609 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
@@ -559,10 +613,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
559 613
560 /* If it has a RDMA op, tell the peer we did it. This is 614 /* If it has a RDMA op, tell the peer we did it. This is
561 * used by the peer to release use-once RDMA MRs. */ 615 * used by the peer to release use-once RDMA MRs. */
562 if (rm->m_rdma_op) { 616 if (rm->rdma.op_active) {
563 struct rds_ext_header_rdma ext_hdr; 617 struct rds_ext_header_rdma ext_hdr;
564 618
565 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 619 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
566 rds_message_add_extension(&rm->m_inc.i_hdr, 620 rds_message_add_extension(&rm->m_inc.i_hdr,
567 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 621 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
568 } 622 }
@@ -582,99 +636,77 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
582 /* 636 /*
583 * Update adv_credits since we reset the ACK_REQUIRED bit. 637 * Update adv_credits since we reset the ACK_REQUIRED bit.
584 */ 638 */
585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 639 if (ic->i_flowctl) {
586 adv_credits += posted; 640 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
587 BUG_ON(adv_credits > 255); 641 adv_credits += posted;
642 BUG_ON(adv_credits > 255);
643 }
588 } 644 }
589 645
590 send = &ic->i_sends[pos];
591 first = send;
592 prev = NULL;
593 scat = &rm->m_sg[sg];
594 sent = 0;
595 i = 0;
596
597 /* Sometimes you want to put a fence between an RDMA 646 /* Sometimes you want to put a fence between an RDMA
598 * READ and the following SEND. 647 * READ and the following SEND.
599 * We could either do this all the time 648 * We could either do this all the time
600 * or when requested by the user. Right now, we let 649 * or when requested by the user. Right now, we let
601 * the application choose. 650 * the application choose.
602 */ 651 */
603 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 652 if (rm->rdma.op_active && rm->rdma.op_fence)
604 send_flags = IB_SEND_FENCE; 653 send_flags = IB_SEND_FENCE;
605 654
606 /* 655 /* Each frag gets a header. Msgs may be 0 bytes */
607 * We could be copying the header into the unused tail of the page. 656 send = &ic->i_sends[pos];
608 * That would need to be changed in the future when those pages might 657 first = send;
609 * be mapped userspace pages or page cache pages. So instead we always 658 prev = NULL;
610 * use a second sge and our long-lived ring of mapped headers. We send 659 scat = &ic->i_data_op->op_sg[sg];
611 * the header after the data so that the data payload can be aligned on 660 i = 0;
612 * the receiver. 661 do {
613 */ 662 unsigned int len = 0;
614 663
615 /* handle a 0-len message */ 664 /* Set up the header */
616 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { 665 send->s_wr.send_flags = send_flags;
617 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); 666 send->s_wr.opcode = IB_WR_SEND;
618 goto add_header; 667 send->s_wr.num_sge = 1;
619 } 668 send->s_wr.next = NULL;
669 send->s_queued = jiffies;
670 send->s_op = NULL;
620 671
621 /* if there's data reference it with a chain of work reqs */ 672 send->s_sge[0].addr = ic->i_send_hdrs_dma
622 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 673 + (pos * sizeof(struct rds_header));
623 unsigned int len; 674 send->s_sge[0].length = sizeof(struct rds_header);
624 675
625 send = &ic->i_sends[pos]; 676 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
626 677
627 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 678 /* Set up the data, if present */
628 rds_ib_xmit_populate_wr(ic, send, pos, 679 if (i < work_alloc
629 ib_sg_dma_address(dev, scat) + off, len, 680 && scat != &rm->data.op_sg[rm->data.op_count]) {
630 send_flags); 681 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
682 send->s_wr.num_sge = 2;
631 683
632 /* 684 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
633 * We want to delay signaling completions just enough to get 685 send->s_sge[1].length = len;
634 * the batching benefits but not so much that we create dead time
635 * on the wire.
636 */
637 if (ic->i_unsignaled_wrs-- == 0) {
638 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
639 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
640 }
641 686
642 ic->i_unsignaled_bytes -= len; 687 bytes_sent += len;
643 if (ic->i_unsignaled_bytes <= 0) { 688 off += len;
644 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; 689 if (off == ib_sg_dma_len(dev, scat)) {
645 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 690 scat++;
691 off = 0;
692 }
646 } 693 }
647 694
695 rds_ib_set_wr_signal_state(ic, send, 0);
696
648 /* 697 /*
649 * Always signal the last one if we're stopping due to flow control. 698 * Always signal the last one if we're stopping due to flow control.
650 */ 699 */
651 if (flow_controlled && i == (work_alloc-1)) 700 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
652 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 701 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
653 702
703 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
704 nr_sig++;
705
654 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 706 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
655 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 707 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
656 708
657 sent += len; 709 if (ic->i_flowctl && adv_credits) {
658 off += len;
659 if (off == ib_sg_dma_len(dev, scat)) {
660 scat++;
661 off = 0;
662 }
663
664add_header:
665 /* Tack on the header after the data. The header SGE should already
666 * have been set up to point to the right header buffer. */
667 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
668
669 if (0) {
670 struct rds_header *hdr = &ic->i_send_hdrs[pos];
671
672 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
673 be16_to_cpu(hdr->h_dport),
674 hdr->h_flags,
675 be32_to_cpu(hdr->h_len));
676 }
677 if (adv_credits) {
678 struct rds_header *hdr = &ic->i_send_hdrs[pos]; 710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
679 711
680 /* add credit and redo the header checksum */ 712 /* add credit and redo the header checksum */
@@ -689,20 +721,25 @@ add_header:
689 prev = send; 721 prev = send;
690 722
691 pos = (pos + 1) % ic->i_send_ring.w_nr; 723 pos = (pos + 1) % ic->i_send_ring.w_nr;
692 } 724 send = &ic->i_sends[pos];
725 i++;
726
727 } while (i < work_alloc
728 && scat != &rm->data.op_sg[rm->data.op_count]);
693 729
694 /* Account the RDS header in the number of bytes we sent, but just once. 730 /* Account the RDS header in the number of bytes we sent, but just once.
695 * The caller has no concept of fragmentation. */ 731 * The caller has no concept of fragmentation. */
696 if (hdr_off == 0) 732 if (hdr_off == 0)
697 sent += sizeof(struct rds_header); 733 bytes_sent += sizeof(struct rds_header);
698 734
699 /* if we finished the message then send completion owns it */ 735 /* if we finished the message then send completion owns it */
700 if (scat == &rm->m_sg[rm->m_count]) { 736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_rm = ic->i_rm; 737 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 738 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 ic->i_rm = NULL; 739 ic->i_data_op = NULL;
704 } 740 }
705 741
742 /* Put back wrs & credits we didn't use */
706 if (i < work_alloc) { 743 if (i < work_alloc) {
707 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 744 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
708 work_alloc = i; 745 work_alloc = i;
@@ -710,6 +747,9 @@ add_header:
710 if (ic->i_flowctl && i < credit_alloc) 747 if (ic->i_flowctl && i < credit_alloc)
711 rds_ib_send_add_credits(conn, credit_alloc - i); 748 rds_ib_send_add_credits(conn, credit_alloc - i);
712 749
750 if (nr_sig)
751 atomic_add(nr_sig, &ic->i_signaled_sends);
752
713 /* XXX need to worry about failed_wr and partial sends. */ 753 /* XXX need to worry about failed_wr and partial sends. */
714 failed_wr = &first->s_wr; 754 failed_wr = &first->s_wr;
715 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 755 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
@@ -720,32 +760,127 @@ add_header:
720 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " 760 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
721 "returned %d\n", &conn->c_faddr, ret); 761 "returned %d\n", &conn->c_faddr, ret);
722 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 762 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
723 if (prev->s_rm) { 763 rds_ib_sub_signaled(ic, nr_sig);
724 ic->i_rm = prev->s_rm; 764 if (prev->s_op) {
725 prev->s_rm = NULL; 765 ic->i_data_op = prev->s_op;
766 prev->s_op = NULL;
726 } 767 }
727 768
728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); 769 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
729 goto out; 770 goto out;
730 } 771 }
731 772
732 ret = sent; 773 ret = bytes_sent;
733out: 774out:
734 BUG_ON(adv_credits); 775 BUG_ON(adv_credits);
735 return ret; 776 return ret;
736} 777}
737 778
738int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 779/*
780 * Issue atomic operation.
781 * A simplified version of the rdma case, we always map 1 SG, and
782 * only 8 bytes, for the return value from the atomic operation.
783 */
784int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785{
786 struct rds_ib_connection *ic = conn->c_transport_data;
787 struct rds_ib_send_work *send = NULL;
788 struct ib_send_wr *failed_wr;
789 struct rds_ib_device *rds_ibdev;
790 u32 pos;
791 u32 work_alloc;
792 int ret;
793 int nr_sig = 0;
794
795 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
796
797 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
798 if (work_alloc != 1) {
799 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
800 rds_ib_stats_inc(s_ib_tx_ring_full);
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 /* address of send request in ring */
806 send = &ic->i_sends[pos];
807 send->s_queued = jiffies;
808
809 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
810 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
811 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
812 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
813 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
814 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
815 } else { /* FADD */
816 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
817 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
818 send->s_wr.wr.atomic.swap = 0;
819 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
820 send->s_wr.wr.atomic.swap_mask = 0;
821 }
822 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
823 send->s_wr.num_sge = 1;
824 send->s_wr.next = NULL;
825 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
826 send->s_wr.wr.atomic.rkey = op->op_rkey;
827 send->s_op = op;
828 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
829
830 /* map 8 byte retval buffer to the device */
831 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
832 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
833 if (ret != 1) {
834 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
835 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
836 ret = -ENOMEM; /* XXX ? */
837 goto out;
838 }
839
840 /* Convert our struct scatterlist to struct ib_sge */
841 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
842 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
843 send->s_sge[0].lkey = ic->i_mr->lkey;
844
845 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
846 send->s_sge[0].addr, send->s_sge[0].length);
847
848 if (nr_sig)
849 atomic_add(nr_sig, &ic->i_signaled_sends);
850
851 failed_wr = &send->s_wr;
852 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
853 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
854 send, &send->s_wr, ret, failed_wr);
855 BUG_ON(failed_wr != &send->s_wr);
856 if (ret) {
857 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
858 "returned %d\n", &conn->c_faddr, ret);
859 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
860 rds_ib_sub_signaled(ic, nr_sig);
861 goto out;
862 }
863
864 if (unlikely(failed_wr != &send->s_wr)) {
865 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
866 BUG_ON(failed_wr != &send->s_wr);
867 }
868
869out:
870 return ret;
871}
872
873int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
739{ 874{
740 struct rds_ib_connection *ic = conn->c_transport_data; 875 struct rds_ib_connection *ic = conn->c_transport_data;
741 struct rds_ib_send_work *send = NULL; 876 struct rds_ib_send_work *send = NULL;
742 struct rds_ib_send_work *first; 877 struct rds_ib_send_work *first;
743 struct rds_ib_send_work *prev; 878 struct rds_ib_send_work *prev;
744 struct ib_send_wr *failed_wr; 879 struct ib_send_wr *failed_wr;
745 struct rds_ib_device *rds_ibdev;
746 struct scatterlist *scat; 880 struct scatterlist *scat;
747 unsigned long len; 881 unsigned long len;
748 u64 remote_addr = op->r_remote_addr; 882 u64 remote_addr = op->op_remote_addr;
883 u32 max_sge = ic->rds_ibdev->max_sge;
749 u32 pos; 884 u32 pos;
750 u32 work_alloc; 885 u32 work_alloc;
751 u32 i; 886 u32 i;
@@ -753,29 +888,28 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
753 int sent; 888 int sent;
754 int ret; 889 int ret;
755 int num_sge; 890 int num_sge;
756 891 int nr_sig = 0;
757 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 892
758 893 /* map the op the first time we see it */
759 /* map the message the first time we see it */ 894 if (!op->op_mapped) {
760 if (!op->r_mapped) { 895 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
761 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 896 op->op_sg, op->op_nents, (op->op_write) ?
762 op->r_sg, op->r_nents, (op->r_write) ? 897 DMA_TO_DEVICE : DMA_FROM_DEVICE);
763 DMA_TO_DEVICE : DMA_FROM_DEVICE); 898 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
764 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 899 if (op->op_count == 0) {
765 if (op->r_count == 0) {
766 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 900 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
767 ret = -ENOMEM; /* XXX ? */ 901 ret = -ENOMEM; /* XXX ? */
768 goto out; 902 goto out;
769 } 903 }
770 904
771 op->r_mapped = 1; 905 op->op_mapped = 1;
772 } 906 }
773 907
774 /* 908 /*
775 * Instead of knowing how to return a partial rdma read/write we insist that there 909 * Instead of knowing how to return a partial rdma read/write we insist that there
776 * be enough work requests to send the entire message. 910 * be enough work requests to send the entire message.
777 */ 911 */
778 i = ceil(op->r_count, rds_ibdev->max_sge); 912 i = ceil(op->op_count, max_sge);
779 913
780 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); 914 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
781 if (work_alloc != i) { 915 if (work_alloc != i) {
@@ -788,30 +922,24 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
788 send = &ic->i_sends[pos]; 922 send = &ic->i_sends[pos];
789 first = send; 923 first = send;
790 prev = NULL; 924 prev = NULL;
791 scat = &op->r_sg[0]; 925 scat = &op->op_sg[0];
792 sent = 0; 926 sent = 0;
793 num_sge = op->r_count; 927 num_sge = op->op_count;
794 928
795 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 929 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
796 send->s_wr.send_flags = 0; 930 send->s_wr.send_flags = 0;
797 send->s_queued = jiffies; 931 send->s_queued = jiffies;
798 /* 932 send->s_op = NULL;
799 * We want to delay signaling completions just enough to get 933
800 * the batching benefits but not so much that we create dead time on the wire. 934 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 */
802 if (ic->i_unsignaled_wrs-- == 0) {
803 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
804 send->s_wr.send_flags = IB_SEND_SIGNALED;
805 }
806 935
807 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 936 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
808 send->s_wr.wr.rdma.remote_addr = remote_addr; 937 send->s_wr.wr.rdma.remote_addr = remote_addr;
809 send->s_wr.wr.rdma.rkey = op->r_key; 938 send->s_wr.wr.rdma.rkey = op->op_rkey;
810 send->s_op = op;
811 939
812 if (num_sge > rds_ibdev->max_sge) { 940 if (num_sge > max_sge) {
813 send->s_wr.num_sge = rds_ibdev->max_sge; 941 send->s_wr.num_sge = max_sge;
814 num_sge -= rds_ibdev->max_sge; 942 num_sge -= max_sge;
815 } else { 943 } else {
816 send->s_wr.num_sge = num_sge; 944 send->s_wr.num_sge = num_sge;
817 } 945 }
@@ -821,7 +949,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
821 if (prev) 949 if (prev)
822 prev->s_wr.next = &send->s_wr; 950 prev->s_wr.next = &send->s_wr;
823 951
824 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 952 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
825 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 953 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
826 send->s_sge[j].addr = 954 send->s_sge[j].addr =
827 ib_sg_dma_address(ic->i_cm_id->device, scat); 955 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -843,15 +971,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
843 send = ic->i_sends; 971 send = ic->i_sends;
844 } 972 }
845 973
846 /* if we finished the message then send completion owns it */ 974 /* give a reference to the last op */
847 if (scat == &op->r_sg[op->r_count]) 975 if (scat == &op->op_sg[op->op_count]) {
848 prev->s_wr.send_flags = IB_SEND_SIGNALED; 976 prev->s_op = op;
977 rds_message_addref(container_of(op, struct rds_message, rdma));
978 }
849 979
850 if (i < work_alloc) { 980 if (i < work_alloc) {
851 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 981 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
852 work_alloc = i; 982 work_alloc = i;
853 } 983 }
854 984
985 if (nr_sig)
986 atomic_add(nr_sig, &ic->i_signaled_sends);
987
855 failed_wr = &first->s_wr; 988 failed_wr = &first->s_wr;
856 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 989 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
857 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 990 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
@@ -861,6 +994,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
861 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 994 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
862 "returned %d\n", &conn->c_faddr, ret); 995 "returned %d\n", &conn->c_faddr, ret);
863 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 996 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
997 rds_ib_sub_signaled(ic, nr_sig);
864 goto out; 998 goto out;
865 } 999 }
866 1000
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d2c904dd6fbc..2d5965d6e97c 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -67,6 +67,8 @@ static const char *const rds_ib_stat_names[] = {
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_pool_depleted",
70 "ib_atomic_cswp",
71 "ib_atomic_fadd",
70}; 72};
71 73
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, 74unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 03f01cb4e0fe..1253b006efdb 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -49,10 +49,6 @@ unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; 49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; 50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51 51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56/* 52/*
57 * This sysctl does nothing. 53 * This sysctl does nothing.
58 * 54 *
@@ -65,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
65 */ 61 */
66unsigned int rds_ib_sysctl_flow_control = 0; 62unsigned int rds_ib_sysctl_flow_control = 0;
67 63
68ctl_table rds_ib_sysctl_table[] = { 64static ctl_table rds_ib_sysctl_table[] = {
69 { 65 {
70 .procname = "max_send_wr", 66 .procname = "max_send_wr",
71 .data = &rds_ib_sysctl_max_send_wr, 67 .data = &rds_ib_sysctl_max_send_wr,
@@ -94,15 +90,6 @@ ctl_table rds_ib_sysctl_table[] = {
94 .extra2 = &rds_ib_sysctl_max_unsig_wr_max, 90 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
95 }, 91 },
96 { 92 {
97 .procname = "max_unsignaled_bytes",
98 .data = &rds_ib_sysctl_max_unsig_bytes,
99 .maxlen = sizeof(unsigned long),
100 .mode = 0644,
101 .proc_handler = proc_doulongvec_minmax,
102 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
103 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
104 },
105 {
106 .procname = "max_recv_allocation", 93 .procname = "max_recv_allocation",
107 .data = &rds_ib_sysctl_max_recv_allocation, 94 .data = &rds_ib_sysctl_max_recv_allocation,
108 .maxlen = sizeof(unsigned long), 95 .maxlen = sizeof(unsigned long),
@@ -132,10 +119,10 @@ void rds_ib_sysctl_exit(void)
132 unregister_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
133} 120}
134 121
135int __init rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
136{ 123{
137 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
138 if (rds_ib_sysctl_hdr == NULL) 125 if (!rds_ib_sysctl_hdr)
139 return -ENOMEM; 126 return -ENOMEM;
140 return 0; 127 return 0;
141} 128}
diff --git a/net/rds/info.c b/net/rds/info.c
index c45c4173a44d..4fdf1b6e84ff 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -76,7 +76,7 @@ void rds_info_register_func(int optname, rds_info_func func)
76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
77 77
78 spin_lock(&rds_info_lock); 78 spin_lock(&rds_info_lock);
79 BUG_ON(rds_info_funcs[offset] != NULL); 79 BUG_ON(rds_info_funcs[offset]);
80 rds_info_funcs[offset] = func; 80 rds_info_funcs[offset] = func;
81 spin_unlock(&rds_info_lock); 81 spin_unlock(&rds_info_lock);
82} 82}
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
102 */ 102 */
103void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
104{ 104{
105 if (iter->addr != NULL) { 105 if (iter->addr) {
106 kunmap_atomic(iter->addr, KM_USER0); 106 kunmap_atomic(iter->addr, KM_USER0);
107 iter->addr = NULL; 107 iter->addr = NULL;
108 } 108 }
@@ -117,7 +117,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
117 unsigned long this; 117 unsigned long this;
118 118
119 while (bytes) { 119 while (bytes) {
120 if (iter->addr == NULL) 120 if (!iter->addr)
121 iter->addr = kmap_atomic(*iter->pages, KM_USER0); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
122 122
123 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 >> PAGE_SHIFT; 188 >> PAGE_SHIFT;
189 189
190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
191 if (pages == NULL) { 191 if (!pages) {
192 ret = -ENOMEM; 192 ret = -ENOMEM;
193 goto out; 193 goto out;
194 } 194 }
@@ -206,7 +206,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
206 206
207call_func: 207call_func:
208 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 208 func = rds_info_funcs[optname - RDS_INFO_FIRST];
209 if (func == NULL) { 209 if (!func) {
210 ret = -ENOPROTOOPT; 210 ret = -ENOPROTOOPT;
211 goto out; 211 goto out;
212 } 212 }
@@ -234,7 +234,7 @@ call_func:
234 ret = -EFAULT; 234 ret = -EFAULT;
235 235
236out: 236out:
237 for (i = 0; pages != NULL && i < nr_pages; i++) 237 for (i = 0; pages && i < nr_pages; i++)
238 put_page(pages[i]); 238 put_page(pages[i]);
239 kfree(pages); 239 kfree(pages);
240 240
diff --git a/net/rds/iw.c b/net/rds/iw.c
index c8f3d3525cb9..5a9676fe594f 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -56,7 +56,7 @@ struct list_head rds_iw_devices;
56DEFINE_SPINLOCK(iw_nodev_conns_lock); 56DEFINE_SPINLOCK(iw_nodev_conns_lock);
57LIST_HEAD(iw_nodev_conns); 57LIST_HEAD(iw_nodev_conns);
58 58
59void rds_iw_add_one(struct ib_device *device) 59static void rds_iw_add_one(struct ib_device *device)
60{ 60{
61 struct rds_iw_device *rds_iwdev; 61 struct rds_iw_device *rds_iwdev;
62 struct ib_device_attr *dev_attr; 62 struct ib_device_attr *dev_attr;
@@ -124,7 +124,7 @@ free_attr:
124 kfree(dev_attr); 124 kfree(dev_attr);
125} 125}
126 126
127void rds_iw_remove_one(struct ib_device *device) 127static void rds_iw_remove_one(struct ib_device *device)
128{ 128{
129 struct rds_iw_device *rds_iwdev; 129 struct rds_iw_device *rds_iwdev;
130 struct rds_iw_cm_id *i_cm_id, *next; 130 struct rds_iw_cm_id *i_cm_id, *next;
@@ -264,7 +264,6 @@ struct rds_transport rds_iw_transport = {
264 .laddr_check = rds_iw_laddr_check, 264 .laddr_check = rds_iw_laddr_check,
265 .xmit_complete = rds_iw_xmit_complete, 265 .xmit_complete = rds_iw_xmit_complete,
266 .xmit = rds_iw_xmit, 266 .xmit = rds_iw_xmit,
267 .xmit_cong_map = NULL,
268 .xmit_rdma = rds_iw_xmit_rdma, 267 .xmit_rdma = rds_iw_xmit_rdma,
269 .recv = rds_iw_recv, 268 .recv = rds_iw_recv,
270 .conn_alloc = rds_iw_conn_alloc, 269 .conn_alloc = rds_iw_conn_alloc,
@@ -272,7 +271,6 @@ struct rds_transport rds_iw_transport = {
272 .conn_connect = rds_iw_conn_connect, 271 .conn_connect = rds_iw_conn_connect,
273 .conn_shutdown = rds_iw_conn_shutdown, 272 .conn_shutdown = rds_iw_conn_shutdown,
274 .inc_copy_to_user = rds_iw_inc_copy_to_user, 273 .inc_copy_to_user = rds_iw_inc_copy_to_user,
275 .inc_purge = rds_iw_inc_purge,
276 .inc_free = rds_iw_inc_free, 274 .inc_free = rds_iw_inc_free,
277 .cm_initiate_connect = rds_iw_cm_initiate_connect, 275 .cm_initiate_connect = rds_iw_cm_initiate_connect,
278 .cm_handle_connect = rds_iw_cm_handle_connect, 276 .cm_handle_connect = rds_iw_cm_handle_connect,
@@ -289,7 +287,7 @@ struct rds_transport rds_iw_transport = {
289 .t_prefer_loopback = 1, 287 .t_prefer_loopback = 1,
290}; 288};
291 289
292int __init rds_iw_init(void) 290int rds_iw_init(void)
293{ 291{
294 int ret; 292 int ret;
295 293
diff --git a/net/rds/iw.h b/net/rds/iw.h
index eef2f0c28476..90151922178c 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -70,7 +70,7 @@ struct rds_iw_send_work {
70 struct rds_message *s_rm; 70 struct rds_message *s_rm;
71 71
72 /* We should really put these into a union: */ 72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op; 73 struct rm_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping; 74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr; 75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list; 76 struct ib_fast_reg_page_list *s_page_list;
@@ -268,8 +268,6 @@ static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
268 268
269/* ib.c */ 269/* ib.c */
270extern struct rds_transport rds_iw_transport; 270extern struct rds_transport rds_iw_transport;
271extern void rds_iw_add_one(struct ib_device *device);
272extern void rds_iw_remove_one(struct ib_device *device);
273extern struct ib_client rds_iw_client; 271extern struct ib_client rds_iw_client;
274 272
275extern unsigned int fastreg_pool_size; 273extern unsigned int fastreg_pool_size;
@@ -284,7 +282,7 @@ void rds_iw_conn_free(void *arg);
284int rds_iw_conn_connect(struct rds_connection *conn); 282int rds_iw_conn_connect(struct rds_connection *conn);
285void rds_iw_conn_shutdown(struct rds_connection *conn); 283void rds_iw_conn_shutdown(struct rds_connection *conn);
286void rds_iw_state_change(struct sock *sk); 284void rds_iw_state_change(struct sock *sk);
287int __init rds_iw_listen_init(void); 285int rds_iw_listen_init(void);
288void rds_iw_listen_stop(void); 286void rds_iw_listen_stop(void);
289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); 287void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, 288int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -318,15 +316,13 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
318void rds_iw_sync_mr(void *trans_private, int dir); 316void rds_iw_sync_mr(void *trans_private, int dir);
319void rds_iw_free_mr(void *trans_private, int invalidate); 317void rds_iw_free_mr(void *trans_private, int invalidate);
320void rds_iw_flush_mrs(void); 318void rds_iw_flush_mrs(void);
321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
322 319
323/* ib_recv.c */ 320/* ib_recv.c */
324int __init rds_iw_recv_init(void); 321int rds_iw_recv_init(void);
325void rds_iw_recv_exit(void); 322void rds_iw_recv_exit(void);
326int rds_iw_recv(struct rds_connection *conn); 323int rds_iw_recv(struct rds_connection *conn);
327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 324int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
328 gfp_t page_gfp, int prefill); 325 gfp_t page_gfp, int prefill);
329void rds_iw_inc_purge(struct rds_incoming *inc);
330void rds_iw_inc_free(struct rds_incoming *inc); 326void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 327int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
332 size_t size); 328 size_t size);
@@ -358,7 +354,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
358void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); 354void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
359void rds_iw_send_init_ring(struct rds_iw_connection *ic); 355void rds_iw_send_init_ring(struct rds_iw_connection *ic);
360void rds_iw_send_clear_ring(struct rds_iw_connection *ic); 356void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
361int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 357int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
362void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); 358void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
363void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); 359void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
364int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, 360int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
@@ -371,7 +367,7 @@ unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
371 unsigned int avail); 367 unsigned int avail);
372 368
373/* ib_sysctl.c */ 369/* ib_sysctl.c */
374int __init rds_iw_sysctl_init(void); 370int rds_iw_sysctl_init(void);
375void rds_iw_sysctl_exit(void); 371void rds_iw_sysctl_exit(void);
376extern unsigned long rds_iw_sysctl_max_send_wr; 372extern unsigned long rds_iw_sysctl_max_send_wr;
377extern unsigned long rds_iw_sysctl_max_recv_wr; 373extern unsigned long rds_iw_sysctl_max_recv_wr;
@@ -379,7 +375,6 @@ extern unsigned long rds_iw_sysctl_max_unsig_wrs;
379extern unsigned long rds_iw_sysctl_max_unsig_bytes; 375extern unsigned long rds_iw_sysctl_max_unsig_bytes;
380extern unsigned long rds_iw_sysctl_max_recv_allocation; 376extern unsigned long rds_iw_sysctl_max_recv_allocation;
381extern unsigned int rds_iw_sysctl_flow_control; 377extern unsigned int rds_iw_sysctl_flow_control;
382extern ctl_table rds_iw_sysctl_table[];
383 378
384/* 379/*
385 * Helper functions for getting/setting the header and data SGEs in 380 * Helper functions for getting/setting the header and data SGEs in
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index b5dd6ac39be8..712cf2d1f28e 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -257,7 +257,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
257 * the rds_iwdev at all. 257 * the rds_iwdev at all.
258 */ 258 */
259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client); 259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
260 if (rds_iwdev == NULL) { 260 if (!rds_iwdev) {
261 if (printk_ratelimit()) 261 if (printk_ratelimit())
262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", 262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
263 dev->name); 263 dev->name);
@@ -292,7 +292,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
292 ic->i_send_ring.w_nr * 292 ic->i_send_ring.w_nr *
293 sizeof(struct rds_header), 293 sizeof(struct rds_header),
294 &ic->i_send_hdrs_dma, GFP_KERNEL); 294 &ic->i_send_hdrs_dma, GFP_KERNEL);
295 if (ic->i_send_hdrs == NULL) { 295 if (!ic->i_send_hdrs) {
296 ret = -ENOMEM; 296 ret = -ENOMEM;
297 rdsdebug("ib_dma_alloc_coherent send failed\n"); 297 rdsdebug("ib_dma_alloc_coherent send failed\n");
298 goto out; 298 goto out;
@@ -302,7 +302,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
302 ic->i_recv_ring.w_nr * 302 ic->i_recv_ring.w_nr *
303 sizeof(struct rds_header), 303 sizeof(struct rds_header),
304 &ic->i_recv_hdrs_dma, GFP_KERNEL); 304 &ic->i_recv_hdrs_dma, GFP_KERNEL);
305 if (ic->i_recv_hdrs == NULL) { 305 if (!ic->i_recv_hdrs) {
306 ret = -ENOMEM; 306 ret = -ENOMEM;
307 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 307 rdsdebug("ib_dma_alloc_coherent recv failed\n");
308 goto out; 308 goto out;
@@ -310,14 +310,14 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
310 310
311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
312 &ic->i_ack_dma, GFP_KERNEL); 312 &ic->i_ack_dma, GFP_KERNEL);
313 if (ic->i_ack == NULL) { 313 if (!ic->i_ack) {
314 ret = -ENOMEM; 314 ret = -ENOMEM;
315 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 315 rdsdebug("ib_dma_alloc_coherent ack failed\n");
316 goto out; 316 goto out;
317 } 317 }
318 318
319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); 319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
320 if (ic->i_sends == NULL) { 320 if (!ic->i_sends) {
321 ret = -ENOMEM; 321 ret = -ENOMEM;
322 rdsdebug("send allocation failed\n"); 322 rdsdebug("send allocation failed\n");
323 goto out; 323 goto out;
@@ -325,7 +325,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
325 rds_iw_send_init_ring(ic); 325 rds_iw_send_init_ring(ic);
326 326
327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); 327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
328 if (ic->i_recvs == NULL) { 328 if (!ic->i_recvs) {
329 ret = -ENOMEM; 329 ret = -ENOMEM;
330 rdsdebug("recv allocation failed\n"); 330 rdsdebug("recv allocation failed\n");
331 goto out; 331 goto out;
@@ -696,7 +696,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696 696
697 /* XXX too lazy? */ 697 /* XXX too lazy? */
698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); 698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
699 if (ic == NULL) 699 if (!ic)
700 return -ENOMEM; 700 return -ENOMEM;
701 701
702 INIT_LIST_HEAD(&ic->iw_node); 702 INIT_LIST_HEAD(&ic->iw_node);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 13dc1862d862..59509e9a9e72 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38#include "iw.h" 37#include "iw.h"
39 38
40 39
@@ -158,7 +157,8 @@ static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *
158 return 0; 157 return 0;
159} 158}
160 159
161void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) 160static void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev,
161 struct rdma_cm_id *cm_id)
162{ 162{
163 struct rds_iw_cm_id *i_cm_id; 163 struct rds_iw_cm_id *i_cm_id;
164 164
@@ -207,9 +207,9 @@ void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *con
207 BUG_ON(list_empty(&ic->iw_node)); 207 BUG_ON(list_empty(&ic->iw_node));
208 list_del(&ic->iw_node); 208 list_del(&ic->iw_node);
209 209
210 spin_lock_irq(&rds_iwdev->spinlock); 210 spin_lock(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 212 spin_unlock(&rds_iwdev->spinlock);
213 spin_unlock_irq(&iw_nodev_conns_lock); 213 spin_unlock_irq(&iw_nodev_conns_lock);
214 214
215 ic->rds_iwdev = rds_iwdev; 215 ic->rds_iwdev = rds_iwdev;
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 3d479067d54d..5e57347f49ff 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -53,7 +53,7 @@ static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
53static void rds_iw_frag_free(struct rds_page_frag *frag) 53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{ 54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL); 56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag); 57 kmem_cache_free(rds_iw_frag_slab, frag);
58} 58}
59 59
@@ -143,14 +143,14 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
143 struct ib_sge *sge; 143 struct ib_sge *sge;
144 int ret = -ENOMEM; 144 int ret = -ENOMEM;
145 145
146 if (recv->r_iwinc == NULL) { 146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { 147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit); 148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out; 149 goto out;
150 } 150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, 151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp); 152 kptr_gfp);
153 if (recv->r_iwinc == NULL) { 153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation); 154 atomic_dec(&rds_iw_allocation);
155 goto out; 155 goto out;
156 } 156 }
@@ -158,17 +158,17 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); 158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 } 159 }
160 160
161 if (recv->r_frag == NULL) { 161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); 162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (recv->r_frag == NULL) 163 if (!recv->r_frag)
164 goto out; 164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL; 166 recv->r_frag->f_page = NULL;
167 } 167 }
168 168
169 if (ic->i_frag.f_page == NULL) { 169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp); 170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (ic->i_frag.f_page == NULL) 171 if (!ic->i_frag.f_page)
172 goto out; 172 goto out;
173 ic->i_frag.f_offset = 0; 173 ic->i_frag.f_offset = 0;
174 } 174 }
@@ -273,7 +273,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
273 return ret; 273 return ret;
274} 274}
275 275
276void rds_iw_inc_purge(struct rds_incoming *inc) 276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{ 277{
278 struct rds_iw_incoming *iwinc; 278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag; 279 struct rds_page_frag *frag;
@@ -716,7 +716,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
716 * into the inc and save the inc so we can hang upcoming fragments 716 * into the inc and save the inc so we can hang upcoming fragments
717 * off its list. 717 * off its list.
718 */ 718 */
719 if (iwinc == NULL) { 719 if (!iwinc) {
720 iwinc = recv->r_iwinc; 720 iwinc = recv->r_iwinc;
721 recv->r_iwinc = NULL; 721 recv->r_iwinc = NULL;
722 ic->i_iwinc = iwinc; 722 ic->i_iwinc = iwinc;
@@ -887,7 +887,7 @@ int rds_iw_recv(struct rds_connection *conn)
887 return ret; 887 return ret;
888} 888}
889 889
890int __init rds_iw_recv_init(void) 890int rds_iw_recv_init(void)
891{ 891{
892 struct sysinfo si; 892 struct sysinfo si;
893 int ret = -ENOMEM; 893 int ret = -ENOMEM;
@@ -899,13 +899,13 @@ int __init rds_iw_recv_init(void)
899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", 899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
900 sizeof(struct rds_iw_incoming), 900 sizeof(struct rds_iw_incoming),
901 0, 0, NULL); 901 0, 0, NULL);
902 if (rds_iw_incoming_slab == NULL) 902 if (!rds_iw_incoming_slab)
903 goto out; 903 goto out;
904 904
905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", 905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
906 sizeof(struct rds_page_frag), 906 sizeof(struct rds_page_frag),
907 0, 0, NULL); 907 0, 0, NULL);
908 if (rds_iw_frag_slab == NULL) 908 if (!rds_iw_frag_slab)
909 kmem_cache_destroy(rds_iw_incoming_slab); 909 kmem_cache_destroy(rds_iw_incoming_slab);
910 else 910 else
911 ret = 0; 911 ret = 0;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 52182ff7519e..6280ea020d4e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -36,7 +36,6 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "iw.h" 39#include "iw.h"
41 40
42static void rds_iw_send_rdma_complete(struct rds_message *rm, 41static void rds_iw_send_rdma_complete(struct rds_message *rm,
@@ -64,13 +63,13 @@ static void rds_iw_send_rdma_complete(struct rds_message *rm,
64} 63}
65 64
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, 65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op) 66 struct rm_rdma_op *op)
68{ 67{
69 if (op->r_mapped) { 68 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 69 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 70 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 72 op->op_mapped = 0;
74 } 73 }
75} 74}
76 75
@@ -83,11 +82,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84 83
85 ib_dma_unmap_sg(ic->i_cm_id->device, 84 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents, 85 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
88 87
89 if (rm->m_rdma_op != NULL) { 88 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91 90
92 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -111,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
111 */ 110 */
112 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
113 112
114 if (rm->m_rdma_op->r_write) 113 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else 115 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 } 117 }
119 118
120 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -556,25 +555,27 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
556 } 555 }
557 556
558 /* map the message the first time we see it */ 557 /* map the message the first time we see it */
559 if (ic->i_rm == NULL) { 558 if (!ic->i_rm) {
560 /* 559 /*
561 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", 560 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
562 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 561 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
563 rm->m_inc.i_hdr.h_flags, 562 rm->m_inc.i_hdr.h_flags,
564 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 563 be32_to_cpu(rm->m_inc.i_hdr.h_len));
565 */ 564 */
566 if (rm->m_nents) { 565 if (rm->data.op_nents) {
567 rm->m_count = ib_dma_map_sg(dev, 566 rm->data.op_count = ib_dma_map_sg(dev,
568 rm->m_sg, rm->m_nents, DMA_TO_DEVICE); 567 rm->data.op_sg,
569 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); 568 rm->data.op_nents,
570 if (rm->m_count == 0) { 569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
571 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
572 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); 573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
573 ret = -ENOMEM; /* XXX ? */ 574 ret = -ENOMEM; /* XXX ? */
574 goto out; 575 goto out;
575 } 576 }
576 } else { 577 } else {
577 rm->m_count = 0; 578 rm->data.op_count = 0;
578 } 579 }
579 580
580 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
590 591
591 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
592 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
593 if (rm->m_rdma_op) { 594 if (rm->rdma.op_active) {
594 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
595 596
596 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
597 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
598 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
599 } 600 }
@@ -621,7 +622,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
621 send = &ic->i_sends[pos]; 622 send = &ic->i_sends[pos];
622 first = send; 623 first = send;
623 prev = NULL; 624 prev = NULL;
624 scat = &rm->m_sg[sg]; 625 scat = &rm->data.op_sg[sg];
625 sent = 0; 626 sent = 0;
626 i = 0; 627 i = 0;
627 628
@@ -631,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
631 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
632 * the application choose. 633 * the application choose.
633 */ 634 */
634 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 635 if (rm->rdma.op_active && rm->rdma.op_fence)
635 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
636 637
637 /* 638 /*
@@ -650,7 +651,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
650 } 651 }
651 652
652 /* if there's data reference it with a chain of work reqs */ 653 /* if there's data reference it with a chain of work reqs */
653 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
654 unsigned int len; 655 unsigned int len;
655 656
656 send = &ic->i_sends[pos]; 657 send = &ic->i_sends[pos];
@@ -728,7 +729,7 @@ add_header:
728 sent += sizeof(struct rds_header); 729 sent += sizeof(struct rds_header);
729 730
730 /* if we finished the message then send completion owns it */ 731 /* if we finished the message then send completion owns it */
731 if (scat == &rm->m_sg[rm->m_count]) { 732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
732 prev->s_rm = ic->i_rm; 733 prev->s_rm = ic->i_rm;
733 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
734 ic->i_rm = NULL; 735 ic->i_rm = NULL;
@@ -784,7 +785,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd
784 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
785} 786}
786 787
787int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
788{ 789{
789 struct rds_iw_connection *ic = conn->c_transport_data; 790 struct rds_iw_connection *ic = conn->c_transport_data;
790 struct rds_iw_send_work *send = NULL; 791 struct rds_iw_send_work *send = NULL;
@@ -794,7 +795,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
794 struct rds_iw_device *rds_iwdev; 795 struct rds_iw_device *rds_iwdev;
795 struct scatterlist *scat; 796 struct scatterlist *scat;
796 unsigned long len; 797 unsigned long len;
797 u64 remote_addr = op->r_remote_addr; 798 u64 remote_addr = op->op_remote_addr;
798 u32 pos, fr_pos; 799 u32 pos, fr_pos;
799 u32 work_alloc; 800 u32 work_alloc;
800 u32 i; 801 u32 i;
@@ -806,21 +807,21 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807 808
808 /* map the message the first time we see it */ 809 /* map the message the first time we see it */
809 if (!op->r_mapped) { 810 if (!op->op_mapped) {
810 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->r_sg, op->r_nents, (op->r_write) ? 812 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE); 813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->r_count == 0) { 815 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */ 817 ret = -ENOMEM; /* XXX ? */
817 goto out; 818 goto out;
818 } 819 }
819 820
820 op->r_mapped = 1; 821 op->op_mapped = 1;
821 } 822 }
822 823
823 if (!op->r_write) { 824 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */ 825 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); 826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) { 827 if (work_alloc != 1) {
@@ -835,7 +836,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
835 * Instead of knowing how to return a partial rdma read/write we insist that there 836 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message. 837 * be enough work requests to send the entire message.
837 */ 838 */
838 i = ceil(op->r_count, rds_iwdev->max_sge); 839 i = ceil(op->op_count, rds_iwdev->max_sge);
839 840
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); 841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) { 842 if (work_alloc != i) {
@@ -846,17 +847,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
846 } 847 }
847 848
848 send = &ic->i_sends[pos]; 849 send = &ic->i_sends[pos];
849 if (!op->r_write) { 850 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos]; 851 first = prev = &ic->i_sends[fr_pos];
851 } else { 852 } else {
852 first = send; 853 first = send;
853 prev = NULL; 854 prev = NULL;
854 } 855 }
855 scat = &op->r_sg[0]; 856 scat = &op->op_sg[0];
856 sent = 0; 857 sent = 0;
857 num_sge = op->r_count; 858 num_sge = op->op_count;
858 859
859 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
860 send->s_wr.send_flags = 0; 861 send->s_wr.send_flags = 0;
861 send->s_queued = jiffies; 862 send->s_queued = jiffies;
862 863
@@ -873,13 +874,13 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
873 * for local access after RDS is finished with it, using 874 * for local access after RDS is finished with it, using
874 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
875 */ 876 */
876 if (op->r_write) 877 if (op->op_write)
877 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
878 else 879 else
879 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
880 881
881 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_wr.wr.rdma.remote_addr = remote_addr;
882 send->s_wr.wr.rdma.rkey = op->r_key; 883 send->s_wr.wr.rdma.rkey = op->op_rkey;
883 send->s_op = op; 884 send->s_op = op;
884 885
885 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
@@ -893,7 +894,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
893 if (prev) 894 if (prev)
894 prev->s_wr.next = &send->s_wr; 895 prev->s_wr.next = &send->s_wr;
895 896
896 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
897 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
898 899
899 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
@@ -927,7 +928,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
927 } 928 }
928 929
929 /* if we finished the message then send completion owns it */ 930 /* if we finished the message then send completion owns it */
930 if (scat == &op->r_sg[op->r_count]) 931 if (scat == &op->op_sg[op->op_count])
931 first->s_wr.send_flags = IB_SEND_SIGNALED; 932 first->s_wr.send_flags = IB_SEND_SIGNALED;
932 933
933 if (i < work_alloc) { 934 if (i < work_alloc) {
@@ -941,9 +942,9 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
941 * adapters do not allow using the lkey for this at all. To bypass this use a 942 * adapters do not allow using the lkey for this at all. To bypass this use a
942 * fastreg_mr (or possibly a dma_mr) 943 * fastreg_mr (or possibly a dma_mr)
943 */ 944 */
944 if (!op->r_write) { 945 if (!op->op_write) {
945 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
946 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
947 work_alloc++; 948 work_alloc++;
948 } 949 }
949 950
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 1c4428a61a02..e2e47176e729 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55 55
56unsigned int rds_iw_sysctl_flow_control = 1; 56unsigned int rds_iw_sysctl_flow_control = 1;
57 57
58ctl_table rds_iw_sysctl_table[] = { 58static ctl_table rds_iw_sysctl_table[] = {
59 { 59 {
60 .procname = "max_send_wr", 60 .procname = "max_send_wr",
61 .data = &rds_iw_sysctl_max_send_wr, 61 .data = &rds_iw_sysctl_max_send_wr,
@@ -122,10 +122,10 @@ void rds_iw_sysctl_exit(void)
122 unregister_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
123} 123}
124 124
125int __init rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
126{ 126{
127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
128 if (rds_iw_sysctl_hdr == NULL) 128 if (!rds_iw_sysctl_hdr)
129 return -ENOMEM; 129 return -ENOMEM;
130 return 0; 130 return 0;
131} 131}
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dd9879379457..aeec1d483b17 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,17 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
68 }
69
64 BUG_ON(hdr_off || sg || off); 70 BUG_ON(hdr_off || sg || off);
65 71
66 rds_inc_init(&rm->m_inc, conn, conn->c_laddr); 72 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
67 rds_message_addref(rm); /* for the inc */ 73 /* For the embedded inc. Matching put is in loop_inc_free() */
74 rds_message_addref(rm);
68 75
69 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 76 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
70 GFP_KERNEL, KM_USER0); 77 GFP_KERNEL, KM_USER0);
@@ -77,16 +84,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
77 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
78} 85}
79 86
80static int rds_loop_xmit_cong_map(struct rds_connection *conn, 87/*
81 struct rds_cong_map *map, 88 * See rds_loop_xmit(). Since our inc is embedded in the rm, we
82 unsigned long offset) 89 * make sure the rm lives at least until the inc is done.
90 */
91static void rds_loop_inc_free(struct rds_incoming *inc)
83{ 92{
84 BUG_ON(offset); 93 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
85 BUG_ON(map != conn->c_lcong); 94 rds_message_put(rm);
86
87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
88
89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
90} 95}
91 96
92/* we need to at least give the thread something to succeed */ 97/* we need to at least give the thread something to succeed */
@@ -112,7 +117,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
112 unsigned long flags; 117 unsigned long flags;
113 118
114 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); 119 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
115 if (lc == NULL) 120 if (!lc)
116 return -ENOMEM; 121 return -ENOMEM;
117 122
118 INIT_LIST_HEAD(&lc->loop_node); 123 INIT_LIST_HEAD(&lc->loop_node);
@@ -129,8 +134,12 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
129static void rds_loop_conn_free(void *arg) 134static void rds_loop_conn_free(void *arg)
130{ 135{
131 struct rds_loop_connection *lc = arg; 136 struct rds_loop_connection *lc = arg;
137 unsigned long flags;
138
132 rdsdebug("lc %p\n", lc); 139 rdsdebug("lc %p\n", lc);
140 spin_lock_irqsave(&loop_conns_lock, flags);
133 list_del(&lc->loop_node); 141 list_del(&lc->loop_node);
142 spin_unlock_irqrestore(&loop_conns_lock, flags);
134 kfree(lc); 143 kfree(lc);
135} 144}
136 145
@@ -169,14 +178,12 @@ void rds_loop_exit(void)
169 */ 178 */
170struct rds_transport rds_loop_transport = { 179struct rds_transport rds_loop_transport = {
171 .xmit = rds_loop_xmit, 180 .xmit = rds_loop_xmit,
172 .xmit_cong_map = rds_loop_xmit_cong_map,
173 .recv = rds_loop_recv, 181 .recv = rds_loop_recv,
174 .conn_alloc = rds_loop_conn_alloc, 182 .conn_alloc = rds_loop_conn_alloc,
175 .conn_free = rds_loop_conn_free, 183 .conn_free = rds_loop_conn_free,
176 .conn_connect = rds_loop_conn_connect, 184 .conn_connect = rds_loop_conn_connect,
177 .conn_shutdown = rds_loop_conn_shutdown, 185 .conn_shutdown = rds_loop_conn_shutdown,
178 .inc_copy_to_user = rds_message_inc_copy_to_user, 186 .inc_copy_to_user = rds_message_inc_copy_to_user,
179 .inc_purge = rds_message_inc_purge, 187 .inc_free = rds_loop_inc_free,
180 .inc_free = rds_message_inc_free,
181 .t_name = "loopback", 188 .t_name = "loopback",
182}; 189};
diff --git a/net/rds/message.c b/net/rds/message.c
index 9a1d67e001ba..1fd3d29023d7 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -34,9 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38
39static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
40 37
41static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 38static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_NONE] = 0, 39[RDS_EXTHDR_NONE] = 0,
@@ -63,29 +60,31 @@ static void rds_message_purge(struct rds_message *rm)
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 60 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 61 return;
65 62
66 for (i = 0; i < rm->m_nents; i++) { 63 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); 64 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 65 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->m_sg[i])); 66 __free_page(sg_page(&rm->data.op_sg[i]));
70 } 67 }
71 rm->m_nents = 0; 68 rm->data.op_nents = 0;
72 69
73 if (rm->m_rdma_op) 70 if (rm->rdma.op_active)
74 rds_rdma_free_op(rm->m_rdma_op); 71 rds_rdma_free_op(&rm->rdma);
75 if (rm->m_rdma_mr) 72 if (rm->rdma.op_rdma_mr)
76 rds_mr_put(rm->m_rdma_mr); 73 rds_mr_put(rm->rdma.op_rdma_mr);
77}
78 74
79void rds_message_inc_purge(struct rds_incoming *inc) 75 if (rm->atomic.op_active)
80{ 76 rds_atomic_free_op(&rm->atomic);
81 struct rds_message *rm = container_of(inc, struct rds_message, m_inc); 77 if (rm->atomic.op_rdma_mr)
82 rds_message_purge(rm); 78 rds_mr_put(rm->atomic.op_rdma_mr);
83} 79}
84 80
85void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
86{ 82{
87 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
88 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
89 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
90 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
91 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
@@ -96,12 +95,6 @@ void rds_message_put(struct rds_message *rm)
96} 95}
97EXPORT_SYMBOL_GPL(rds_message_put); 96EXPORT_SYMBOL_GPL(rds_message_put);
98 97
99void rds_message_inc_free(struct rds_incoming *inc)
100{
101 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
102 rds_message_put(rm);
103}
104
105void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 98void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
106 __be16 dport, u64 seq) 99 __be16 dport, u64 seq)
107{ 100{
@@ -113,8 +106,8 @@ void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
113} 106}
114EXPORT_SYMBOL_GPL(rds_message_populate_header); 107EXPORT_SYMBOL_GPL(rds_message_populate_header);
115 108
116int rds_message_add_extension(struct rds_header *hdr, 109int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
117 unsigned int type, const void *data, unsigned int len) 110 const void *data, unsigned int len)
118{ 111{
119 unsigned int ext_len = sizeof(u8) + len; 112 unsigned int ext_len = sizeof(u8) + len;
120 unsigned char *dst; 113 unsigned char *dst;
@@ -184,26 +177,6 @@ none:
184 return RDS_EXTHDR_NONE; 177 return RDS_EXTHDR_NONE;
185} 178}
186 179
187int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version)
188{
189 struct rds_ext_header_version ext_hdr;
190
191 ext_hdr.h_version = cpu_to_be32(version);
192 return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr));
193}
194
195int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version)
196{
197 struct rds_ext_header_version ext_hdr;
198 unsigned int pos = 0, len = sizeof(ext_hdr);
199
200 /* We assume the version extension is the only one present */
201 if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION)
202 return 0;
203 *version = be32_to_cpu(ext_hdr.h_version);
204 return 1;
205}
206
207int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) 180int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
208{ 181{
209 struct rds_ext_header_rdma_dest ext_hdr; 182 struct rds_ext_header_rdma_dest ext_hdr;
@@ -214,41 +187,75 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
214} 187}
215EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 188EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
216 189
217struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) 190/*
191 * Each rds_message is allocated with extra space for the scatterlist entries
192 * rds ops will need. This is to minimize memory allocation count. Then, each rds op
193 * can grab SGs when initializing its part of the rds_message.
194 */
195struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
218{ 196{
219 struct rds_message *rm; 197 struct rds_message *rm;
220 198
221 rm = kzalloc(sizeof(struct rds_message) + 199 rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
222 (nents * sizeof(struct scatterlist)), gfp);
223 if (!rm) 200 if (!rm)
224 goto out; 201 goto out;
225 202
226 if (nents) 203 rm->m_used_sgs = 0;
227 sg_init_table(rm->m_sg, nents); 204 rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
205
228 atomic_set(&rm->m_refcount, 1); 206 atomic_set(&rm->m_refcount, 1);
229 INIT_LIST_HEAD(&rm->m_sock_item); 207 INIT_LIST_HEAD(&rm->m_sock_item);
230 INIT_LIST_HEAD(&rm->m_conn_item); 208 INIT_LIST_HEAD(&rm->m_conn_item);
231 spin_lock_init(&rm->m_rs_lock); 209 spin_lock_init(&rm->m_rs_lock);
210 init_waitqueue_head(&rm->m_flush_wait);
232 211
233out: 212out:
234 return rm; 213 return rm;
235} 214}
236 215
216/*
217 * RDS ops use this to grab SG entries from the rm's sg pool.
218 */
219struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
220{
221 struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
222 struct scatterlist *sg_ret;
223
224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
225 WARN_ON(!nents);
226
227 if (rm->m_used_sgs + nents > rm->m_total_sgs)
228 return NULL;
229
230 sg_ret = &sg_first[rm->m_used_sgs];
231 sg_init_table(sg_ret, nents);
232 rm->m_used_sgs += nents;
233
234 return sg_ret;
235}
236
237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
238{ 238{
239 struct rds_message *rm; 239 struct rds_message *rm;
240 unsigned int i; 240 unsigned int i;
241 int num_sgs = ceil(total_len, PAGE_SIZE);
242 int extra_bytes = num_sgs * sizeof(struct scatterlist);
241 243
242 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 244 rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
243 if (rm == NULL) 245 if (!rm)
244 return ERR_PTR(-ENOMEM); 246 return ERR_PTR(-ENOMEM);
245 247
246 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 248 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
247 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 249 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
248 rm->m_nents = ceil(total_len, PAGE_SIZE); 250 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
251 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
252 if (!rm->data.op_sg) {
253 rds_message_put(rm);
254 return ERR_PTR(-ENOMEM);
255 }
249 256
250 for (i = 0; i < rm->m_nents; ++i) { 257 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->m_sg[i], 258 sg_set_page(&rm->data.op_sg[i],
252 virt_to_page(page_addrs[i]), 259 virt_to_page(page_addrs[i]),
253 PAGE_SIZE, 0); 260 PAGE_SIZE, 0);
254 } 261 }
@@ -256,40 +263,33 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
256 return rm; 263 return rm;
257} 264}
258 265
259struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 266int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
260 size_t total_len) 267 size_t total_len)
261{ 268{
262 unsigned long to_copy; 269 unsigned long to_copy;
263 unsigned long iov_off; 270 unsigned long iov_off;
264 unsigned long sg_off; 271 unsigned long sg_off;
265 struct rds_message *rm;
266 struct iovec *iov; 272 struct iovec *iov;
267 struct scatterlist *sg; 273 struct scatterlist *sg;
268 int ret; 274 int ret = 0;
269
270 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
271 if (rm == NULL) {
272 ret = -ENOMEM;
273 goto out;
274 }
275 275
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
277 277
278 /* 278 /*
279 * now allocate and copy in the data payload. 279 * now allocate and copy in the data payload.
280 */ 280 */
281 sg = rm->m_sg; 281 sg = rm->data.op_sg;
282 iov = first_iov; 282 iov = first_iov;
283 iov_off = 0; 283 iov_off = 0;
284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
285 285
286 while (total_len) { 286 while (total_len) {
287 if (sg_page(sg) == NULL) { 287 if (!sg_page(sg)) {
288 ret = rds_page_remainder_alloc(sg, total_len, 288 ret = rds_page_remainder_alloc(sg, total_len,
289 GFP_HIGHUSER); 289 GFP_HIGHUSER);
290 if (ret) 290 if (ret)
291 goto out; 291 goto out;
292 rm->m_nents++; 292 rm->data.op_nents++;
293 sg_off = 0; 293 sg_off = 0;
294 } 294 }
295 295
@@ -320,14 +320,8 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
320 sg++; 320 sg++;
321 } 321 }
322 322
323 ret = 0;
324out: 323out:
325 if (ret) { 324 return ret;
326 if (rm)
327 rds_message_put(rm);
328 rm = ERR_PTR(ret);
329 }
330 return rm;
331} 325}
332 326
333int rds_message_inc_copy_to_user(struct rds_incoming *inc, 327int rds_message_inc_copy_to_user(struct rds_incoming *inc,
@@ -348,7 +342,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
348 342
349 iov = first_iov; 343 iov = first_iov;
350 iov_off = 0; 344 iov_off = 0;
351 sg = rm->m_sg; 345 sg = rm->data.op_sg;
352 vec_off = 0; 346 vec_off = 0;
353 copied = 0; 347 copied = 0;
354 348
@@ -394,15 +388,14 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
394 */ 388 */
395void rds_message_wait(struct rds_message *rm) 389void rds_message_wait(struct rds_message *rm)
396{ 390{
397 wait_event(rds_message_flush_waitq, 391 wait_event_interruptible(rm->m_flush_wait,
398 !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 392 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
399} 393}
400 394
401void rds_message_unmapped(struct rds_message *rm) 395void rds_message_unmapped(struct rds_message *rm)
402{ 396{
403 clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 397 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
404 if (waitqueue_active(&rds_message_flush_waitq)) 398 wake_up_interruptible(&rm->m_flush_wait);
405 wake_up(&rds_message_flush_waitq);
406} 399}
407EXPORT_SYMBOL_GPL(rds_message_unmapped); 400EXPORT_SYMBOL_GPL(rds_message_unmapped);
408 401
diff --git a/net/rds/page.c b/net/rds/page.c
index 1dfbfea12e9b..d8acdebe3c7c 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -40,7 +40,8 @@ struct rds_page_remainder {
40 unsigned long r_offset; 40 unsigned long r_offset;
41}; 41};
42 42
43DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); 43static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder,
44 rds_page_remainders);
44 45
45/* 46/*
46 * returns 0 on success or -errno on failure. 47 * returns 0 on success or -errno on failure.
@@ -103,7 +104,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
103 /* jump straight to allocation if we're trying for a huge page */ 104 /* jump straight to allocation if we're trying for a huge page */
104 if (bytes >= PAGE_SIZE) { 105 if (bytes >= PAGE_SIZE) {
105 page = alloc_page(gfp); 106 page = alloc_page(gfp);
106 if (page == NULL) { 107 if (!page) {
107 ret = -ENOMEM; 108 ret = -ENOMEM;
108 } else { 109 } else {
109 sg_set_page(scat, page, PAGE_SIZE, 0); 110 sg_set_page(scat, page, PAGE_SIZE, 0);
@@ -149,7 +150,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
149 rem = &per_cpu(rds_page_remainders, get_cpu()); 150 rem = &per_cpu(rds_page_remainders, get_cpu());
150 local_irq_save(flags); 151 local_irq_save(flags);
151 152
152 if (page == NULL) { 153 if (!page) {
153 ret = -ENOMEM; 154 ret = -ENOMEM;
154 break; 155 break;
155 } 156 }
@@ -173,6 +174,7 @@ out:
173 ret ? 0 : scat->length); 174 ret ? 0 : scat->length);
174 return ret; 175 return ret;
175} 176}
177EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
176 178
177static int rds_page_remainder_cpu_notify(struct notifier_block *self, 179static int rds_page_remainder_cpu_notify(struct notifier_block *self,
178 unsigned long action, void *hcpu) 180 unsigned long action, void *hcpu)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 75fd13bb631b..8920f2a83327 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -35,7 +35,7 @@
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ 36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
37 37
38#include "rdma.h" 38#include "rds.h"
39 39
40/* 40/*
41 * XXX 41 * XXX
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
130{ 130{
131 struct rds_mr *mr; 131 struct rds_mr *mr;
132 struct rb_node *node; 132 struct rb_node *node;
133 unsigned long flags;
133 134
134 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
135 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
136 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = container_of(node, struct rds_mr, r_rb_node);
137 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
138 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
142 RB_CLEAR_NODE(&mr->r_rb_node);
143 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
144 rds_destroy_mr(mr);
139 rds_mr_put(mr); 145 rds_mr_put(mr);
146 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
140 } 147 }
148 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
141 149
142 if (rs->rs_transport && rs->rs_transport->flush_mrs) 150 if (rs->rs_transport && rs->rs_transport->flush_mrs)
143 rs->rs_transport->flush_mrs(); 151 rs->rs_transport->flush_mrs();
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
181 goto out; 189 goto out;
182 } 190 }
183 191
184 if (rs->rs_transport->get_mr == NULL) { 192 if (!rs->rs_transport->get_mr) {
185 ret = -EOPNOTSUPP; 193 ret = -EOPNOTSUPP;
186 goto out; 194 goto out;
187 } 195 }
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
197 205
198 /* XXX clamp nr_pages to limit the size of this alloc? */ 206 /* XXX clamp nr_pages to limit the size of this alloc? */
199 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 207 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
200 if (pages == NULL) { 208 if (!pages) {
201 ret = -ENOMEM; 209 ret = -ENOMEM;
202 goto out; 210 goto out;
203 } 211 }
204 212
205 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); 213 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
206 if (mr == NULL) { 214 if (!mr) {
207 ret = -ENOMEM; 215 ret = -ENOMEM;
208 goto out; 216 goto out;
209 } 217 }
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
230 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to 238 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
231 * the zero page. 239 * the zero page.
232 */ 240 */
233 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); 241 ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
234 if (ret < 0) 242 if (ret < 0)
235 goto out; 243 goto out;
236 244
237 nents = ret; 245 nents = ret;
238 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); 246 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
239 if (sg == NULL) { 247 if (!sg) {
240 ret = -ENOMEM; 248 ret = -ENOMEM;
241 goto out; 249 goto out;
242 } 250 }
@@ -406,68 +414,153 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
406 414
407 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 415 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
408 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 416 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
409 if (mr && (mr->r_use_once || force)) { 417 if (!mr) {
418 printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key);
419 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
420 return;
421 }
422
423 if (mr->r_use_once || force) {
410 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 424 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
411 RB_CLEAR_NODE(&mr->r_rb_node); 425 RB_CLEAR_NODE(&mr->r_rb_node);
412 zot_me = 1; 426 zot_me = 1;
413 } else if (mr) 427 }
414 atomic_inc(&mr->r_refcount);
415 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); 428 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
416 429
417 /* May have to issue a dma_sync on this memory region. 430 /* May have to issue a dma_sync on this memory region.
418 * Note we could avoid this if the operation was a RDMA READ, 431 * Note we could avoid this if the operation was a RDMA READ,
419 * but at this point we can't tell. */ 432 * but at this point we can't tell. */
420 if (mr != NULL) { 433 if (mr->r_trans->sync_mr)
421 if (mr->r_trans->sync_mr) 434 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
422 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); 435
423 436 /* If the MR was marked as invalidate, this will
424 /* If the MR was marked as invalidate, this will 437 * trigger an async flush. */
425 * trigger an async flush. */ 438 if (zot_me)
426 if (zot_me) 439 rds_destroy_mr(mr);
427 rds_destroy_mr(mr); 440 rds_mr_put(mr);
428 rds_mr_put(mr);
429 }
430} 441}
431 442
432void rds_rdma_free_op(struct rds_rdma_op *ro) 443void rds_rdma_free_op(struct rm_rdma_op *ro)
433{ 444{
434 unsigned int i; 445 unsigned int i;
435 446
436 for (i = 0; i < ro->r_nents; i++) { 447 for (i = 0; i < ro->op_nents; i++) {
437 struct page *page = sg_page(&ro->r_sg[i]); 448 struct page *page = sg_page(&ro->op_sg[i]);
438 449
439 /* Mark page dirty if it was possibly modified, which 450 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 451 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 452 * to local memory */
442 if (!ro->r_write) { 453 if (!ro->op_write) {
443 BUG_ON(in_interrupt()); 454 BUG_ON(irqs_disabled());
444 set_page_dirty(page); 455 set_page_dirty(page);
445 } 456 }
446 put_page(page); 457 put_page(page);
447 } 458 }
448 459
449 kfree(ro->r_notifier); 460 kfree(ro->op_notifier);
450 kfree(ro); 461 ro->op_notifier = NULL;
462 ro->op_active = 0;
463}
464
465void rds_atomic_free_op(struct rm_atomic_op *ao)
466{
467 struct page *page = sg_page(ao->op_sg);
468
469 /* Mark page dirty if it was possibly modified, which
470 * is the case for a RDMA_READ which copies from remote
471 * to local memory */
472 set_page_dirty(page);
473 put_page(page);
474
475 kfree(ao->op_notifier);
476 ao->op_notifier = NULL;
477 ao->op_active = 0;
451} 478}
452 479
480
453/* 481/*
454 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 482 * Count the number of pages needed to describe an incoming iovec array.
455 */ 483 */
456static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 484static int rds_rdma_pages(struct rds_iovec iov[], int nr_iovecs)
457 struct rds_rdma_args *args) 485{
486 int tot_pages = 0;
487 unsigned int nr_pages;
488 unsigned int i;
489
490 /* figure out the number of pages in the vector */
491 for (i = 0; i < nr_iovecs; i++) {
492 nr_pages = rds_pages_in_vec(&iov[i]);
493 if (nr_pages == 0)
494 return -EINVAL;
495
496 tot_pages += nr_pages;
497
498 /*
499 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
500 * so tot_pages cannot overflow without first going negative.
501 */
502 if (tot_pages < 0)
503 return -EINVAL;
504 }
505
506 return tot_pages;
507}
508
509int rds_rdma_extra_size(struct rds_rdma_args *args)
458{ 510{
459 struct rds_iovec vec; 511 struct rds_iovec vec;
460 struct rds_rdma_op *op = NULL; 512 struct rds_iovec __user *local_vec;
513 int tot_pages = 0;
461 unsigned int nr_pages; 514 unsigned int nr_pages;
462 unsigned int max_pages; 515 unsigned int i;
516
517 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
518
519 /* figure out the number of pages in the vector */
520 for (i = 0; i < args->nr_local; i++) {
521 if (copy_from_user(&vec, &local_vec[i],
522 sizeof(struct rds_iovec)))
523 return -EFAULT;
524
525 nr_pages = rds_pages_in_vec(&vec);
526 if (nr_pages == 0)
527 return -EINVAL;
528
529 tot_pages += nr_pages;
530
531 /*
532 * nr_pages for one entry is limited to (UINT_MAX>>PAGE_SHIFT)+1,
533 * so tot_pages cannot overflow without first going negative.
534 */
535 if (tot_pages < 0)
536 return -EINVAL;
537 }
538
539 return tot_pages * sizeof(struct scatterlist);
540}
541
542/*
543 * The application asks for a RDMA transfer.
544 * Extract all arguments and set up the rdma_op
545 */
546int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
547 struct cmsghdr *cmsg)
548{
549 struct rds_rdma_args *args;
550 struct rm_rdma_op *op = &rm->rdma;
551 int nr_pages;
463 unsigned int nr_bytes; 552 unsigned int nr_bytes;
464 struct page **pages = NULL; 553 struct page **pages = NULL;
465 struct rds_iovec __user *local_vec; 554 struct rds_iovec iovstack[UIO_FASTIOV], *iovs = iovstack;
466 struct scatterlist *sg; 555 int iov_size;
467 unsigned int nr;
468 unsigned int i, j; 556 unsigned int i, j;
469 int ret; 557 int ret = 0;
558
559 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
560 || rm->rdma.op_active)
561 return -EINVAL;
470 562
563 args = CMSG_DATA(cmsg);
471 564
472 if (rs->rs_bound_addr == 0) { 565 if (rs->rs_bound_addr == 0) {
473 ret = -ENOTCONN; /* XXX not a great errno */ 566 ret = -ENOTCONN; /* XXX not a great errno */
@@ -479,61 +572,59 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
479 goto out; 572 goto out;
480 } 573 }
481 574
482 nr_pages = 0; 575 /* Check whether to allocate the iovec area */
483 max_pages = 0; 576 iov_size = args->nr_local * sizeof(struct rds_iovec);
484 577 if (args->nr_local > UIO_FASTIOV) {
485 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; 578 iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL);
486 579 if (!iovs) {
487 /* figure out the number of pages in the vector */ 580 ret = -ENOMEM;
488 for (i = 0; i < args->nr_local; i++) {
489 if (copy_from_user(&vec, &local_vec[i],
490 sizeof(struct rds_iovec))) {
491 ret = -EFAULT;
492 goto out;
493 }
494
495 nr = rds_pages_in_vec(&vec);
496 if (nr == 0) {
497 ret = -EINVAL;
498 goto out; 581 goto out;
499 } 582 }
583 }
500 584
501 max_pages = max(nr, max_pages); 585 if (copy_from_user(iovs, (struct rds_iovec __user *)(unsigned long) args->local_vec_addr, iov_size)) {
502 nr_pages += nr; 586 ret = -EFAULT;
587 goto out;
503 } 588 }
504 589
505 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); 590 nr_pages = rds_rdma_pages(iovs, args->nr_local);
506 if (pages == NULL) { 591 if (nr_pages < 0) {
507 ret = -ENOMEM; 592 ret = -EINVAL;
508 goto out; 593 goto out;
509 } 594 }
510 595
511 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 596 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (op == NULL) { 597 if (!pages) {
513 ret = -ENOMEM; 598 ret = -ENOMEM;
514 goto out; 599 goto out;
515 } 600 }
516 601
517 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 602 op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
518 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 603 op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
519 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 604 op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
520 op->r_recverr = rs->rs_recverr; 605 op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
606 op->op_active = 1;
607 op->op_recverr = rs->rs_recverr;
521 WARN_ON(!nr_pages); 608 WARN_ON(!nr_pages);
522 sg_init_table(op->r_sg, nr_pages); 609 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
610 if (!op->op_sg) {
611 ret = -ENOMEM;
612 goto out;
613 }
523 614
524 if (op->r_notify || op->r_recverr) { 615 if (op->op_notify || op->op_recverr) {
525 /* We allocate an uninitialized notifier here, because 616 /* We allocate an uninitialized notifier here, because
526 * we don't want to do that in the completion handler. We 617 * we don't want to do that in the completion handler. We
527 * would have to use GFP_ATOMIC there, and don't want to deal 618 * would have to use GFP_ATOMIC there, and don't want to deal
528 * with failed allocations. 619 * with failed allocations.
529 */ 620 */
530 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); 621 op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
531 if (!op->r_notifier) { 622 if (!op->op_notifier) {
532 ret = -ENOMEM; 623 ret = -ENOMEM;
533 goto out; 624 goto out;
534 } 625 }
535 op->r_notifier->n_user_token = args->user_token; 626 op->op_notifier->n_user_token = args->user_token;
536 op->r_notifier->n_status = RDS_RDMA_SUCCESS; 627 op->op_notifier->n_status = RDS_RDMA_SUCCESS;
537 } 628 }
538 629
539 /* The cookie contains the R_Key of the remote memory region, and 630 /* The cookie contains the R_Key of the remote memory region, and
@@ -543,68 +634,55 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
543 * destination address (which is really an offset into the MR) 634 * destination address (which is really an offset into the MR)
544 * FIXME: We may want to move this into ib_rdma.c 635 * FIXME: We may want to move this into ib_rdma.c
545 */ 636 */
546 op->r_key = rds_rdma_cookie_key(args->cookie); 637 op->op_rkey = rds_rdma_cookie_key(args->cookie);
547 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); 638 op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
548 639
549 nr_bytes = 0; 640 nr_bytes = 0;
550 641
551 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", 642 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
552 (unsigned long long)args->nr_local, 643 (unsigned long long)args->nr_local,
553 (unsigned long long)args->remote_vec.addr, 644 (unsigned long long)args->remote_vec.addr,
554 op->r_key); 645 op->op_rkey);
555 646
556 for (i = 0; i < args->nr_local; i++) { 647 for (i = 0; i < args->nr_local; i++) {
557 if (copy_from_user(&vec, &local_vec[i], 648 struct rds_iovec *iov = &iovs[i];
558 sizeof(struct rds_iovec))) { 649 /* don't need to check, rds_rdma_pages() verified nr will be +nonzero */
559 ret = -EFAULT; 650 unsigned int nr = rds_pages_in_vec(iov);
560 goto out;
561 }
562 651
563 nr = rds_pages_in_vec(&vec); 652 rs->rs_user_addr = iov->addr;
564 if (nr == 0) { 653 rs->rs_user_bytes = iov->bytes;
565 ret = -EINVAL;
566 goto out;
567 }
568 654
569 rs->rs_user_addr = vec.addr;
570 rs->rs_user_bytes = vec.bytes;
571
572 /* did the user change the vec under us? */
573 if (nr > max_pages || op->r_nents + nr > nr_pages) {
574 ret = -EINVAL;
575 goto out;
576 }
577 /* If it's a WRITE operation, we want to pin the pages for reading. 655 /* If it's a WRITE operation, we want to pin the pages for reading.
578 * If it's a READ operation, we need to pin the pages for writing. 656 * If it's a READ operation, we need to pin the pages for writing.
579 */ 657 */
580 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); 658 ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
581 if (ret < 0) 659 if (ret < 0)
582 goto out; 660 goto out;
583 661
584 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", 662 rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
585 nr_bytes, nr, vec.bytes, vec.addr); 663 nr_bytes, nr, iov->bytes, iov->addr);
586 664
587 nr_bytes += vec.bytes; 665 nr_bytes += iov->bytes;
588 666
589 for (j = 0; j < nr; j++) { 667 for (j = 0; j < nr; j++) {
590 unsigned int offset = vec.addr & ~PAGE_MASK; 668 unsigned int offset = iov->addr & ~PAGE_MASK;
669 struct scatterlist *sg;
591 670
592 sg = &op->r_sg[op->r_nents + j]; 671 sg = &op->op_sg[op->op_nents + j];
593 sg_set_page(sg, pages[j], 672 sg_set_page(sg, pages[j],
594 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 673 min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
595 offset); 674 offset);
596 675
597 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", 676 rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
598 sg->offset, sg->length, vec.addr, vec.bytes); 677 sg->offset, sg->length, iov->addr, iov->bytes);
599 678
600 vec.addr += sg->length; 679 iov->addr += sg->length;
601 vec.bytes -= sg->length; 680 iov->bytes -= sg->length;
602 } 681 }
603 682
604 op->r_nents += nr; 683 op->op_nents += nr;
605 } 684 }
606 685
607
608 if (nr_bytes > args->remote_vec.bytes) { 686 if (nr_bytes > args->remote_vec.bytes) {
609 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", 687 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
610 nr_bytes, 688 nr_bytes,
@@ -612,38 +690,18 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
612 ret = -EINVAL; 690 ret = -EINVAL;
613 goto out; 691 goto out;
614 } 692 }
615 op->r_bytes = nr_bytes; 693 op->op_bytes = nr_bytes;
616 694
617 ret = 0;
618out: 695out:
696 if (iovs != iovstack)
697 sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size);
619 kfree(pages); 698 kfree(pages);
620 if (ret) { 699 if (ret)
621 if (op) 700 rds_rdma_free_op(op);
622 rds_rdma_free_op(op); 701 else
623 op = ERR_PTR(ret); 702 rds_stats_inc(s_send_rdma);
624 }
625 return op;
626}
627
628/*
629 * The application asks for a RDMA transfer.
630 * Extract all arguments and set up the rdma_op
631 */
632int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
633 struct cmsghdr *cmsg)
634{
635 struct rds_rdma_op *op;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
638 rm->m_rdma_op != NULL)
639 return -EINVAL;
640 703
641 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); 704 return ret;
642 if (IS_ERR(op))
643 return PTR_ERR(op);
644 rds_stats_inc(s_send_rdma);
645 rm->m_rdma_op = op;
646 return 0;
647} 705}
648 706
649/* 707/*
@@ -673,7 +731,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
673 731
674 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 732 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
675 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 733 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
676 if (mr == NULL) 734 if (!mr)
677 err = -EINVAL; /* invalid r_key */ 735 err = -EINVAL; /* invalid r_key */
678 else 736 else
679 atomic_inc(&mr->r_refcount); 737 atomic_inc(&mr->r_refcount);
@@ -681,7 +739,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
681 739
682 if (mr) { 740 if (mr) {
683 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 741 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
684 rm->m_rdma_mr = mr; 742 rm->rdma.op_rdma_mr = mr;
685 } 743 }
686 return err; 744 return err;
687} 745}
@@ -699,5 +757,102 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
699 rm->m_rdma_cookie != 0) 757 rm->m_rdma_cookie != 0)
700 return -EINVAL; 758 return -EINVAL;
701 759
702 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); 760 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
761}
762
763/*
764 * Fill in rds_message for an atomic request.
765 */
766int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
767 struct cmsghdr *cmsg)
768{
769 struct page *page = NULL;
770 struct rds_atomic_args *args;
771 int ret = 0;
772
773 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
774 || rm->atomic.op_active)
775 return -EINVAL;
776
777 args = CMSG_DATA(cmsg);
778
779 /* Nonmasked & masked cmsg ops converted to masked hw ops */
780 switch (cmsg->cmsg_type) {
781 case RDS_CMSG_ATOMIC_FADD:
782 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
783 rm->atomic.op_m_fadd.add = args->fadd.add;
784 rm->atomic.op_m_fadd.nocarry_mask = 0;
785 break;
786 case RDS_CMSG_MASKED_ATOMIC_FADD:
787 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
788 rm->atomic.op_m_fadd.add = args->m_fadd.add;
789 rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
790 break;
791 case RDS_CMSG_ATOMIC_CSWP:
792 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
793 rm->atomic.op_m_cswp.compare = args->cswp.compare;
794 rm->atomic.op_m_cswp.swap = args->cswp.swap;
795 rm->atomic.op_m_cswp.compare_mask = ~0;
796 rm->atomic.op_m_cswp.swap_mask = ~0;
797 break;
798 case RDS_CMSG_MASKED_ATOMIC_CSWP:
799 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
800 rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
801 rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
802 rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
803 rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
804 break;
805 default:
806 BUG(); /* should never happen */
807 }
808
809 rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
810 rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
811 rm->atomic.op_active = 1;
812 rm->atomic.op_recverr = rs->rs_recverr;
813 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
814 if (!rm->atomic.op_sg) {
815 ret = -ENOMEM;
816 goto err;
817 }
818
819 /* verify 8 byte-aligned */
820 if (args->local_addr & 0x7) {
821 ret = -EFAULT;
822 goto err;
823 }
824
825 ret = rds_pin_pages(args->local_addr, 1, &page, 1);
826 if (ret != 1)
827 goto err;
828 ret = 0;
829
830 sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
831
832 if (rm->atomic.op_notify || rm->atomic.op_recverr) {
833 /* We allocate an uninitialized notifier here, because
834 * we don't want to do that in the completion handler. We
835 * would have to use GFP_ATOMIC there, and don't want to deal
836 * with failed allocations.
837 */
838 rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
839 if (!rm->atomic.op_notifier) {
840 ret = -ENOMEM;
841 goto err;
842 }
843
844 rm->atomic.op_notifier->n_user_token = args->user_token;
845 rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
846 }
847
848 rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
849 rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
850
851 return ret;
852err:
853 if (page)
854 put_page(page);
855 kfree(rm->atomic.op_notifier);
856
857 return ret;
703} 858}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
deleted file mode 100644
index 909c39835a5d..000000000000
--- a/net/rds/rdma.h
+++ /dev/null
@@ -1,85 +0,0 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
65int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
66void rds_rdma_drop_keys(struct rds_sock *rs);
67int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
68 struct cmsghdr *cmsg);
69int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
70 struct cmsghdr *cmsg);
71int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
72 struct cmsghdr *cmsg);
73int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
74 struct cmsghdr *cmsg);
75void rds_rdma_free_op(struct rds_rdma_op *ro);
76void rds_rdma_send_complete(struct rds_message *rm, int);
77
78extern void __rds_put_mr_final(struct rds_mr *mr);
79static inline void rds_mr_put(struct rds_mr *mr)
80{
81 if (atomic_dec_and_test(&mr->r_refcount))
82 __rds_put_mr_final(mr);
83}
84
85#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index e599ba2f950d..4195a0539829 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -36,6 +36,34 @@
36 36
37static struct rdma_cm_id *rds_rdma_listen_id; 37static struct rdma_cm_id *rds_rdma_listen_id;
38 38
39static char *rds_cm_event_strings[] = {
40#define RDS_CM_EVENT_STRING(foo) \
41 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
42 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
43 RDS_CM_EVENT_STRING(ADDR_ERROR),
44 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
45 RDS_CM_EVENT_STRING(ROUTE_ERROR),
46 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
47 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
48 RDS_CM_EVENT_STRING(CONNECT_ERROR),
49 RDS_CM_EVENT_STRING(UNREACHABLE),
50 RDS_CM_EVENT_STRING(REJECTED),
51 RDS_CM_EVENT_STRING(ESTABLISHED),
52 RDS_CM_EVENT_STRING(DISCONNECTED),
53 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
54 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
55 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
56 RDS_CM_EVENT_STRING(ADDR_CHANGE),
57 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
58#undef RDS_CM_EVENT_STRING
59};
60
61static char *rds_cm_event_str(enum rdma_cm_event_type type)
62{
63 return rds_str_array(rds_cm_event_strings,
64 ARRAY_SIZE(rds_cm_event_strings), type);
65};
66
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 67int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event) 68 struct rdma_cm_event *event)
41{ 69{
@@ -44,8 +72,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
44 struct rds_transport *trans; 72 struct rds_transport *trans;
45 int ret = 0; 73 int ret = 0;
46 74
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, 75 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
48 event->event); 76 event->event, rds_cm_event_str(event->event));
49 77
50 if (cm_id->device->node_type == RDMA_NODE_RNIC) 78 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport; 79 trans = &rds_iw_transport;
@@ -109,7 +137,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 137
110 default: 138 default:
111 /* things like device disconnect? */ 139 /* things like device disconnect? */
112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event); 140 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
141 event->event, rds_cm_event_str(event->event));
113 break; 142 break;
114 } 143 }
115 144
@@ -117,12 +146,13 @@ out:
117 if (conn) 146 if (conn)
118 mutex_unlock(&conn->c_cm_lock); 147 mutex_unlock(&conn->c_cm_lock);
119 148
120 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); 149 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
150 rds_cm_event_str(event->event), ret);
121 151
122 return ret; 152 return ret;
123} 153}
124 154
125static int __init rds_rdma_listen_init(void) 155static int rds_rdma_listen_init(void)
126{ 156{
127 struct sockaddr_in sin; 157 struct sockaddr_in sin;
128 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
@@ -177,7 +207,7 @@ static void rds_rdma_listen_stop(void)
177 } 207 }
178} 208}
179 209
180int __init rds_rdma_init(void) 210static int rds_rdma_init(void)
181{ 211{
182 int ret; 212 int ret;
183 213
@@ -204,7 +234,7 @@ out:
204} 234}
205module_init(rds_rdma_init); 235module_init(rds_rdma_init);
206 236
207void rds_rdma_exit(void) 237static void rds_rdma_exit(void)
208{ 238{
209 /* stop listening first to ensure no new connections are attempted */ 239 /* stop listening first to ensure no new connections are attempted */
210 rds_rdma_listen_stop(); 240 rds_rdma_listen_stop();
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index 2f2c7d976c21..faba4e382695 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -11,10 +11,6 @@ int rds_rdma_conn_connect(struct rds_connection *conn);
11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
12 struct rdma_cm_event *event); 12 struct rdma_cm_event *event);
13 13
14/* from rdma_transport.c */
15int rds_rdma_init(void);
16void rds_rdma_exit(void);
17
18/* from ib.c */ 14/* from ib.c */
19extern struct rds_transport rds_ib_transport; 15extern struct rds_transport rds_ib_transport;
20int rds_ib_init(void); 16int rds_ib_init(void);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba9..9542449c0720 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
80/* Bits for c_flags */ 80/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 82#define RDS_RECONNECT_PENDING 1
83#define RDS_IN_XMIT 2
83 84
84struct rds_connection { 85struct rds_connection {
85 struct hlist_node c_hash_node; 86 struct hlist_node c_hash_node;
@@ -91,12 +92,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 92 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 93 struct rds_cong_map *c_fcong;
93 94
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 100 unsigned int c_xmit_rdma_sent;
101 unsigned int c_xmit_data_sent;
100 102
101 spinlock_t c_lock; /* protect msg queues */ 103 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 104 u64 c_next_tx_seq;
@@ -116,11 +118,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 118 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 119 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 120 struct mutex c_cm_lock; /* protect conn state & cm */
121 wait_queue_head_t c_waitq;
119 122
120 struct list_head c_map_item; 123 struct list_head c_map_item;
121 unsigned long c_map_queued; 124 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 125
125 unsigned int c_unacked_packets; 126 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 127 unsigned int c_unacked_bytes;
@@ -206,6 +207,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 207 rds_rdma_cookie_t i_rdma_cookie;
207}; 208};
208 209
210struct rds_mr {
211 struct rb_node r_rb_node;
212 atomic_t r_refcount;
213 u32 r_key;
214
215 /* A copy of the creation flags */
216 unsigned int r_use_once:1;
217 unsigned int r_invalidate:1;
218 unsigned int r_write:1;
219
220 /* This is for RDS_MR_DEAD.
221 * It would be nice & consistent to make this part of the above
222 * bit field here, but we need to use test_and_set_bit.
223 */
224 unsigned long r_state;
225 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
226 struct rds_transport *r_trans;
227 void *r_trans_private;
228};
229
230/* Flags for mr->r_state */
231#define RDS_MR_DEAD 0
232
233static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
234{
235 return r_key | (((u64) offset) << 32);
236}
237
238static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
239{
240 return cookie;
241}
242
243static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
244{
245 return cookie >> 32;
246}
247
248/* atomic operation types */
249#define RDS_ATOMIC_TYPE_CSWP 0
250#define RDS_ATOMIC_TYPE_FADD 1
251
209/* 252/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 253 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +301,71 @@ struct rds_message {
258 * -> rs->rs_lock 301 * -> rs->rs_lock
259 */ 302 */
260 spinlock_t m_rs_lock; 303 spinlock_t m_rs_lock;
304 wait_queue_head_t m_flush_wait;
305
261 struct rds_sock *m_rs; 306 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 307
308 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 309 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 310
265 unsigned int m_nents; 311 unsigned int m_used_sgs;
266 unsigned int m_count; 312 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 313
314 void *m_final_op;
315
316 struct {
317 struct rm_atomic_op {
318 int op_type;
319 union {
320 struct {
321 uint64_t compare;
322 uint64_t swap;
323 uint64_t compare_mask;
324 uint64_t swap_mask;
325 } op_m_cswp;
326 struct {
327 uint64_t add;
328 uint64_t nocarry_mask;
329 } op_m_fadd;
330 };
331
332 u32 op_rkey;
333 u64 op_remote_addr;
334 unsigned int op_notify:1;
335 unsigned int op_recverr:1;
336 unsigned int op_mapped:1;
337 unsigned int op_silent:1;
338 unsigned int op_active:1;
339 struct scatterlist *op_sg;
340 struct rds_notifier *op_notifier;
341
342 struct rds_mr *op_rdma_mr;
343 } atomic;
344 struct rm_rdma_op {
345 u32 op_rkey;
346 u64 op_remote_addr;
347 unsigned int op_write:1;
348 unsigned int op_fence:1;
349 unsigned int op_notify:1;
350 unsigned int op_recverr:1;
351 unsigned int op_mapped:1;
352 unsigned int op_silent:1;
353 unsigned int op_active:1;
354 unsigned int op_bytes;
355 unsigned int op_nents;
356 unsigned int op_count;
357 struct scatterlist *op_sg;
358 struct rds_notifier *op_notifier;
359
360 struct rds_mr *op_rdma_mr;
361 } rdma;
362 struct rm_data_op {
363 unsigned int op_active:1;
364 unsigned int op_nents;
365 unsigned int op_count;
366 struct scatterlist *op_sg;
367 } data;
368 };
268}; 369};
269 370
270/* 371/*
@@ -305,10 +406,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 406 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 407 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 408 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 409 */
313 410
314#define RDS_TRANS_IB 0 411#define RDS_TRANS_IB 0
@@ -332,13 +429,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 429 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 430 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 431 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 432 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 433 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 434 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 435 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 436 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 437 void (*inc_free)(struct rds_incoming *inc);
343 438
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 439 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +462,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 462 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 463 * support.
369 */ 464 */
370 struct rb_node rs_bound_node; 465 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 466 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 467 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 468 __be16 rs_bound_port;
374 __be16 rs_conn_port; 469 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 470 struct rds_transport *rs_transport;
382 471
383 /* 472 /*
@@ -466,8 +555,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 555 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 557 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 558 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 560 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 561 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 562 uint64_t s_send_drop_acked;
@@ -487,6 +576,7 @@ struct rds_statistics {
487}; 576};
488 577
489/* af_rds.c */ 578/* af_rds.c */
579char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 580void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 581void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 582void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +611,16 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 611struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 612
523/* conn.c */ 613/* conn.c */
524int __init rds_conn_init(void); 614int rds_conn_init(void);
525void rds_conn_exit(void); 615void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 616struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 617 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 618struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 619 struct rds_transport *trans, gfp_t gfp);
620void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 621void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 622void rds_conn_drop(struct rds_connection *conn);
623void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 624void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 625 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 626 struct rds_info_lengths *lens,
@@ -566,7 +657,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 657
567/* message.c */ 658/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 659struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 660struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
661int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 662 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 663struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 664void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -575,12 +667,9 @@ int rds_message_add_extension(struct rds_header *hdr,
575 unsigned int type, const void *data, unsigned int len); 667 unsigned int type, const void *data, unsigned int len);
576int rds_message_next_extension(struct rds_header *hdr, 668int rds_message_next_extension(struct rds_header *hdr,
577 unsigned int *pos, void *buf, unsigned int *buflen); 669 unsigned int *pos, void *buf, unsigned int *buflen);
578int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
579int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 670int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 671int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 672 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 673void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 674void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 675void rds_message_put(struct rds_message *rm);
@@ -614,7 +703,6 @@ void rds_page_exit(void);
614/* recv.c */ 703/* recv.c */
615void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 704void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
616 __be32 saddr); 705 __be32 saddr);
617void rds_inc_addref(struct rds_incoming *inc);
618void rds_inc_put(struct rds_incoming *inc); 706void rds_inc_put(struct rds_incoming *inc);
619void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, 707void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
620 struct rds_incoming *inc, gfp_t gfp, enum km_type km); 708 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
@@ -636,14 +724,38 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 724typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 725void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 726 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 727int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 728struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 729 struct rm_rdma_op *);
644 730
645/* rdma.c */ 731/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 732void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
733int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
734int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
735int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
736void rds_rdma_drop_keys(struct rds_sock *rs);
737int rds_rdma_extra_size(struct rds_rdma_args *args);
738int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
739 struct cmsghdr *cmsg);
740int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
741 struct cmsghdr *cmsg);
742int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
743 struct cmsghdr *cmsg);
744int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
745 struct cmsghdr *cmsg);
746void rds_rdma_free_op(struct rm_rdma_op *ro);
747void rds_atomic_free_op(struct rm_atomic_op *ao);
748void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
749void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
750int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
751 struct cmsghdr *cmsg);
752
753extern void __rds_put_mr_final(struct rds_mr *mr);
754static inline void rds_mr_put(struct rds_mr *mr)
755{
756 if (atomic_dec_and_test(&mr->r_refcount))
757 __rds_put_mr_final(mr);
758}
647 759
648/* stats.c */ 760/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 761DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +769,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 769 put_cpu(); \
658} while (0) 770} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 771#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 772int rds_stats_init(void);
661void rds_stats_exit(void); 773void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 774void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 775 uint64_t *values, const char *const *names,
664 size_t nr); 776 size_t nr);
665 777
666/* sysctl.c */ 778/* sysctl.c */
667int __init rds_sysctl_init(void); 779int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 780void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 781extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 782extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +790,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 790extern unsigned int rds_sysctl_trace_level;
679 791
680/* threads.c */ 792/* threads.c */
681int __init rds_threads_init(void); 793int rds_threads_init(void);
682void rds_threads_exit(void); 794void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 795extern struct workqueue_struct *rds_wq;
796void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 797void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 798void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 799void rds_send_worker(struct work_struct *);
@@ -691,9 +804,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 804int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 805void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 806struct rds_transport *rds_trans_get_preferred(__be32 addr);
807void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 808unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 809 unsigned int avail);
696int __init rds_trans_init(void); 810int rds_trans_init(void);
697void rds_trans_exit(void); 811void rds_trans_exit(void);
698 812
699#endif 813#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c93588c2d553..596689e59272 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -36,7 +36,6 @@
36#include <linux/in.h> 36#include <linux/in.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40 39
41void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
42 __be32 saddr) 41 __be32 saddr)
@@ -49,12 +48,11 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
49} 48}
50EXPORT_SYMBOL_GPL(rds_inc_init); 49EXPORT_SYMBOL_GPL(rds_inc_init);
51 50
52void rds_inc_addref(struct rds_incoming *inc) 51static void rds_inc_addref(struct rds_incoming *inc)
53{ 52{
54 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); 53 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
55 atomic_inc(&inc->i_refcount); 54 atomic_inc(&inc->i_refcount);
56} 55}
57EXPORT_SYMBOL_GPL(rds_inc_addref);
58 56
59void rds_inc_put(struct rds_incoming *inc) 57void rds_inc_put(struct rds_incoming *inc)
60{ 58{
@@ -210,7 +208,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
210 } 208 }
211 209
212 rs = rds_find_bound(daddr, inc->i_hdr.h_dport); 210 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
213 if (rs == NULL) { 211 if (!rs) {
214 rds_stats_inc(s_recv_drop_no_sock); 212 rds_stats_inc(s_recv_drop_no_sock);
215 goto out; 213 goto out;
216 } 214 }
@@ -251,7 +249,7 @@ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
251{ 249{
252 unsigned long flags; 250 unsigned long flags;
253 251
254 if (*inc == NULL) { 252 if (!*inc) {
255 read_lock_irqsave(&rs->rs_recv_lock, flags); 253 read_lock_irqsave(&rs->rs_recv_lock, flags);
256 if (!list_empty(&rs->rs_recv_queue)) { 254 if (!list_empty(&rs->rs_recv_queue)) {
257 *inc = list_entry(rs->rs_recv_queue.next, 255 *inc = list_entry(rs->rs_recv_queue.next,
@@ -334,10 +332,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
334 332
335 if (msghdr) { 333 if (msghdr) {
336 cmsg.user_token = notifier->n_user_token; 334 cmsg.user_token = notifier->n_user_token;
337 cmsg.status = notifier->n_status; 335 cmsg.status = notifier->n_status;
338 336
339 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 337 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
340 sizeof(cmsg), &cmsg); 338 sizeof(cmsg), &cmsg);
341 if (err) 339 if (err)
342 break; 340 break;
343 } 341 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c9..35b9c2e9caf1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -37,7 +37,6 @@
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "rds.h" 39#include "rds.h"
40#include "rdma.h"
41 40
42/* When transmitting messages in rds_send_xmit, we need to emerge from 41/* When transmitting messages in rds_send_xmit, we need to emerge from
43 * time to time and briefly release the CPU. Otherwise the softlock watchdog 42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
@@ -53,8 +52,11 @@ static int send_batch_count = 64;
53module_param(send_batch_count, int, 0444); 52module_param(send_batch_count, int, 0444);
54MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
55 54
55static void rds_send_remove_from_sock(struct list_head *messages, int status);
56
56/* 57/*
57 * Reset the send state. Caller must hold c_send_lock when calling here. 58 * Reset the send state. Callers must ensure that this doesn't race with
59 * rds_send_xmit().
58 */ 60 */
59void rds_send_reset(struct rds_connection *conn) 61void rds_send_reset(struct rds_connection *conn)
60{ 62{
@@ -62,18 +64,22 @@ void rds_send_reset(struct rds_connection *conn)
62 unsigned long flags; 64 unsigned long flags;
63 65
64 if (conn->c_xmit_rm) { 66 if (conn->c_xmit_rm) {
67 rm = conn->c_xmit_rm;
68 conn->c_xmit_rm = NULL;
65 /* Tell the user the RDMA op is no longer mapped by the 69 /* Tell the user the RDMA op is no longer mapped by the
66 * transport. This isn't entirely true (it's flushed out 70 * transport. This isn't entirely true (it's flushed out
67 * independently) but as the connection is down, there's 71 * independently) but as the connection is down, there's
68 * no ongoing RDMA to/from that memory */ 72 * no ongoing RDMA to/from that memory */
69 rds_message_unmapped(conn->c_xmit_rm); 73 rds_message_unmapped(rm);
70 rds_message_put(conn->c_xmit_rm); 74 rds_message_put(rm);
71 conn->c_xmit_rm = NULL;
72 } 75 }
76
73 conn->c_xmit_sg = 0; 77 conn->c_xmit_sg = 0;
74 conn->c_xmit_hdr_off = 0; 78 conn->c_xmit_hdr_off = 0;
75 conn->c_xmit_data_off = 0; 79 conn->c_xmit_data_off = 0;
80 conn->c_xmit_atomic_sent = 0;
76 conn->c_xmit_rdma_sent = 0; 81 conn->c_xmit_rdma_sent = 0;
82 conn->c_xmit_data_sent = 0;
77 83
78 conn->c_map_queued = 0; 84 conn->c_map_queued = 0;
79 85
@@ -90,6 +96,25 @@ void rds_send_reset(struct rds_connection *conn)
90 spin_unlock_irqrestore(&conn->c_lock, flags); 96 spin_unlock_irqrestore(&conn->c_lock, flags);
91} 97}
92 98
99static int acquire_in_xmit(struct rds_connection *conn)
100{
101 return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
102}
103
104static void release_in_xmit(struct rds_connection *conn)
105{
106 clear_bit(RDS_IN_XMIT, &conn->c_flags);
107 smp_mb__after_clear_bit();
108 /*
109 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
110 * hot path and finding waiters is very rare. We don't want to walk
111 * the system-wide hashed waitqueue buckets in the fast path only to
112 * almost never find waiters.
113 */
114 if (waitqueue_active(&conn->c_waitq))
115 wake_up_all(&conn->c_waitq);
116}
117
93/* 118/*
94 * We're making the concious trade-off here to only send one message 119 * We're making the concious trade-off here to only send one message
95 * down the connection at a time. 120 * down the connection at a time.
@@ -109,102 +134,69 @@ int rds_send_xmit(struct rds_connection *conn)
109 struct rds_message *rm; 134 struct rds_message *rm;
110 unsigned long flags; 135 unsigned long flags;
111 unsigned int tmp; 136 unsigned int tmp;
112 unsigned int send_quota = send_batch_count;
113 struct scatterlist *sg; 137 struct scatterlist *sg;
114 int ret = 0; 138 int ret = 0;
115 int was_empty = 0;
116 LIST_HEAD(to_be_dropped); 139 LIST_HEAD(to_be_dropped);
117 140
141restart:
142
118 /* 143 /*
119 * sendmsg calls here after having queued its message on the send 144 * sendmsg calls here after having queued its message on the send
120 * queue. We only have one task feeding the connection at a time. If 145 * queue. We only have one task feeding the connection at a time. If
121 * another thread is already feeding the queue then we back off. This 146 * another thread is already feeding the queue then we back off. This
122 * avoids blocking the caller and trading per-connection data between 147 * avoids blocking the caller and trading per-connection data between
123 * caches per message. 148 * caches per message.
124 *
125 * The sem holder will issue a retry if they notice that someone queued
126 * a message after they stopped walking the send queue but before they
127 * dropped the sem.
128 */ 149 */
129 if (!mutex_trylock(&conn->c_send_lock)) { 150 if (!acquire_in_xmit(conn)) {
130 rds_stats_inc(s_send_sem_contention); 151 rds_stats_inc(s_send_lock_contention);
131 ret = -ENOMEM; 152 ret = -ENOMEM;
132 goto out; 153 goto out;
133 } 154 }
134 155
156 /*
157 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
158 * we do the opposite to avoid races.
159 */
160 if (!rds_conn_up(conn)) {
161 release_in_xmit(conn);
162 ret = 0;
163 goto out;
164 }
165
135 if (conn->c_trans->xmit_prepare) 166 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 167 conn->c_trans->xmit_prepare(conn);
137 168
138 /* 169 /*
139 * spin trying to push headers and data down the connection until 170 * spin trying to push headers and data down the connection until
140 * the connection doens't make forward progress. 171 * the connection doesn't make forward progress.
141 */ 172 */
142 while (--send_quota) { 173 while (1) {
143 /*
144 * See if need to send a congestion map update if we're
145 * between sending messages. The send_sem protects our sole
146 * use of c_map_offset and _bytes.
147 * Note this is used only by transports that define a special
148 * xmit_cong_map function. For all others, we create allocate
149 * a cong_map message and treat it just like any other send.
150 */
151 if (conn->c_map_bytes) {
152 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
153 conn->c_map_offset);
154 if (ret <= 0)
155 break;
156 174
157 conn->c_map_offset += ret;
158 conn->c_map_bytes -= ret;
159 if (conn->c_map_bytes)
160 continue;
161 }
162
163 /* If we're done sending the current message, clear the
164 * offset and S/G temporaries.
165 */
166 rm = conn->c_xmit_rm; 175 rm = conn->c_xmit_rm;
167 if (rm != NULL &&
168 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
169 conn->c_xmit_sg == rm->m_nents) {
170 conn->c_xmit_rm = NULL;
171 conn->c_xmit_sg = 0;
172 conn->c_xmit_hdr_off = 0;
173 conn->c_xmit_data_off = 0;
174 conn->c_xmit_rdma_sent = 0;
175
176 /* Release the reference to the previous message. */
177 rds_message_put(rm);
178 rm = NULL;
179 }
180 176
181 /* If we're asked to send a cong map update, do so. 177 /*
178 * If between sending messages, we can send a pending congestion
179 * map update.
182 */ 180 */
183 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { 181 if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
184 if (conn->c_trans->xmit_cong_map != NULL) {
185 conn->c_map_offset = 0;
186 conn->c_map_bytes = sizeof(struct rds_header) +
187 RDS_CONG_MAP_BYTES;
188 continue;
189 }
190
191 rm = rds_cong_update_alloc(conn); 182 rm = rds_cong_update_alloc(conn);
192 if (IS_ERR(rm)) { 183 if (IS_ERR(rm)) {
193 ret = PTR_ERR(rm); 184 ret = PTR_ERR(rm);
194 break; 185 break;
195 } 186 }
187 rm->data.op_active = 1;
196 188
197 conn->c_xmit_rm = rm; 189 conn->c_xmit_rm = rm;
198 } 190 }
199 191
200 /* 192 /*
201 * Grab the next message from the send queue, if there is one. 193 * If not already working on one, grab the next message.
202 * 194 *
203 * c_xmit_rm holds a ref while we're sending this message down 195 * c_xmit_rm holds a ref while we're sending this message down
204 * the connction. We can use this ref while holding the 196 * the connction. We can use this ref while holding the
205 * send_sem.. rds_send_reset() is serialized with it. 197 * send_sem.. rds_send_reset() is serialized with it.
206 */ 198 */
207 if (rm == NULL) { 199 if (!rm) {
208 unsigned int len; 200 unsigned int len;
209 201
210 spin_lock_irqsave(&conn->c_lock, flags); 202 spin_lock_irqsave(&conn->c_lock, flags);
@@ -224,10 +216,8 @@ int rds_send_xmit(struct rds_connection *conn)
224 216
225 spin_unlock_irqrestore(&conn->c_lock, flags); 217 spin_unlock_irqrestore(&conn->c_lock, flags);
226 218
227 if (rm == NULL) { 219 if (!rm)
228 was_empty = 1;
229 break; 220 break;
230 }
231 221
232 /* Unfortunately, the way Infiniband deals with 222 /* Unfortunately, the way Infiniband deals with
233 * RDMA to a bad MR key is by moving the entire 223 * RDMA to a bad MR key is by moving the entire
@@ -236,13 +226,12 @@ int rds_send_xmit(struct rds_connection *conn)
236 * connection. 226 * connection.
237 * Therefore, we never retransmit messages with RDMA ops. 227 * Therefore, we never retransmit messages with RDMA ops.
238 */ 228 */
239 if (rm->m_rdma_op && 229 if (rm->rdma.op_active &&
240 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 230 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
241 spin_lock_irqsave(&conn->c_lock, flags); 231 spin_lock_irqsave(&conn->c_lock, flags);
242 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 232 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
243 list_move(&rm->m_conn_item, &to_be_dropped); 233 list_move(&rm->m_conn_item, &to_be_dropped);
244 spin_unlock_irqrestore(&conn->c_lock, flags); 234 spin_unlock_irqrestore(&conn->c_lock, flags);
245 rds_message_put(rm);
246 continue; 235 continue;
247 } 236 }
248 237
@@ -263,23 +252,55 @@ int rds_send_xmit(struct rds_connection *conn)
263 conn->c_xmit_rm = rm; 252 conn->c_xmit_rm = rm;
264 } 253 }
265 254
266 /* 255 /* The transport either sends the whole rdma or none of it */
267 * Try and send an rdma message. Let's see if we can 256 if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
268 * keep this simple and require that the transport either 257 rm->m_final_op = &rm->rdma;
269 * send the whole rdma or none of it. 258 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
270 */
271 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
272 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
273 if (ret) 259 if (ret)
274 break; 260 break;
275 conn->c_xmit_rdma_sent = 1; 261 conn->c_xmit_rdma_sent = 1;
262
263 /* The transport owns the mapped memory for now.
264 * You can't unmap it while it's on the send queue */
265 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
266 }
267
268 if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
269 rm->m_final_op = &rm->atomic;
270 ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
271 if (ret)
272 break;
273 conn->c_xmit_atomic_sent = 1;
274
276 /* The transport owns the mapped memory for now. 275 /* The transport owns the mapped memory for now.
277 * You can't unmap it while it's on the send queue */ 276 * You can't unmap it while it's on the send queue */
278 set_bit(RDS_MSG_MAPPED, &rm->m_flags); 277 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
279 } 278 }
280 279
281 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || 280 /*
282 conn->c_xmit_sg < rm->m_nents) { 281 * A number of cases require an RDS header to be sent
282 * even if there is no data.
283 * We permit 0-byte sends; rds-ping depends on this.
284 * However, if there are exclusively attached silent ops,
285 * we skip the hdr/data send, to enable silent operation.
286 */
287 if (rm->data.op_nents == 0) {
288 int ops_present;
289 int all_ops_are_silent = 1;
290
291 ops_present = (rm->atomic.op_active || rm->rdma.op_active);
292 if (rm->atomic.op_active && !rm->atomic.op_silent)
293 all_ops_are_silent = 0;
294 if (rm->rdma.op_active && !rm->rdma.op_silent)
295 all_ops_are_silent = 0;
296
297 if (ops_present && all_ops_are_silent
298 && !rm->m_rdma_cookie)
299 rm->data.op_active = 0;
300 }
301
302 if (rm->data.op_active && !conn->c_xmit_data_sent) {
303 rm->m_final_op = &rm->data;
283 ret = conn->c_trans->xmit(conn, rm, 304 ret = conn->c_trans->xmit(conn, rm,
284 conn->c_xmit_hdr_off, 305 conn->c_xmit_hdr_off,
285 conn->c_xmit_sg, 306 conn->c_xmit_sg,
@@ -295,7 +316,7 @@ int rds_send_xmit(struct rds_connection *conn)
295 ret -= tmp; 316 ret -= tmp;
296 } 317 }
297 318
298 sg = &rm->m_sg[conn->c_xmit_sg]; 319 sg = &rm->data.op_sg[conn->c_xmit_sg];
299 while (ret) { 320 while (ret) {
300 tmp = min_t(int, ret, sg->length - 321 tmp = min_t(int, ret, sg->length -
301 conn->c_xmit_data_off); 322 conn->c_xmit_data_off);
@@ -306,49 +327,63 @@ int rds_send_xmit(struct rds_connection *conn)
306 sg++; 327 sg++;
307 conn->c_xmit_sg++; 328 conn->c_xmit_sg++;
308 BUG_ON(ret != 0 && 329 BUG_ON(ret != 0 &&
309 conn->c_xmit_sg == rm->m_nents); 330 conn->c_xmit_sg == rm->data.op_nents);
310 } 331 }
311 } 332 }
333
334 if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
335 (conn->c_xmit_sg == rm->data.op_nents))
336 conn->c_xmit_data_sent = 1;
312 } 337 }
313 }
314 338
315 /* Nuke any messages we decided not to retransmit. */ 339 /*
316 if (!list_empty(&to_be_dropped)) 340 * A rm will only take multiple times through this loop
317 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 341 * if there is a data op. Thus, if the data is sent (or there was
342 * none), then we're done with the rm.
343 */
344 if (!rm->data.op_active || conn->c_xmit_data_sent) {
345 conn->c_xmit_rm = NULL;
346 conn->c_xmit_sg = 0;
347 conn->c_xmit_hdr_off = 0;
348 conn->c_xmit_data_off = 0;
349 conn->c_xmit_rdma_sent = 0;
350 conn->c_xmit_atomic_sent = 0;
351 conn->c_xmit_data_sent = 0;
352
353 rds_message_put(rm);
354 }
355 }
318 356
319 if (conn->c_trans->xmit_complete) 357 if (conn->c_trans->xmit_complete)
320 conn->c_trans->xmit_complete(conn); 358 conn->c_trans->xmit_complete(conn);
321 359
322 /* 360 release_in_xmit(conn);
323 * We might be racing with another sender who queued a message but
324 * backed off on noticing that we held the c_send_lock. If we check
325 * for queued messages after dropping the sem then either we'll
326 * see the queued message or the queuer will get the sem. If we
327 * notice the queued message then we trigger an immediate retry.
328 *
329 * We need to be careful only to do this when we stopped processing
330 * the send queue because it was empty. It's the only way we
331 * stop processing the loop when the transport hasn't taken
332 * responsibility for forward progress.
333 */
334 mutex_unlock(&conn->c_send_lock);
335 361
336 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { 362 /* Nuke any messages we decided not to retransmit. */
337 /* We exhausted the send quota, but there's work left to 363 if (!list_empty(&to_be_dropped)) {
338 * do. Return and (re-)schedule the send worker. 364 /* irqs on here, so we can put(), unlike above */
339 */ 365 list_for_each_entry(rm, &to_be_dropped, m_conn_item)
340 ret = -EAGAIN; 366 rds_message_put(rm);
367 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
341 } 368 }
342 369
343 if (ret == 0 && was_empty) { 370 /*
344 /* A simple bit test would be way faster than taking the 371 * Other senders can queue a message after we last test the send queue
345 * spin lock */ 372 * but before we clear RDS_IN_XMIT. In that case they'd back off and
346 spin_lock_irqsave(&conn->c_lock, flags); 373 * not try and send their newly queued message. We need to check the
374 * send queue after having cleared RDS_IN_XMIT so that their message
375 * doesn't get stuck on the send queue.
376 *
377 * If the transport cannot continue (i.e ret != 0), then it must
378 * call us when more room is available, such as from the tx
379 * completion handler.
380 */
381 if (ret == 0) {
382 smp_mb();
347 if (!list_empty(&conn->c_send_queue)) { 383 if (!list_empty(&conn->c_send_queue)) {
348 rds_stats_inc(s_send_sem_queue_raced); 384 rds_stats_inc(s_send_lock_queue_raced);
349 ret = -EAGAIN; 385 goto restart;
350 } 386 }
351 spin_unlock_irqrestore(&conn->c_lock, flags);
352 } 387 }
353out: 388out:
354 return ret; 389 return ret;
@@ -376,52 +411,60 @@ static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
376} 411}
377 412
378/* 413/*
379 * Returns true if there are no messages on the send and retransmit queues 414 * This is pretty similar to what happens below in the ACK
380 * which have a sequence number greater than or equal to the given sequence 415 * handling code - except that we call here as soon as we get
381 * number. 416 * the IB send completion on the RDMA op and the accompanying
417 * message.
382 */ 418 */
383int rds_send_acked_before(struct rds_connection *conn, u64 seq) 419void rds_rdma_send_complete(struct rds_message *rm, int status)
384{ 420{
385 struct rds_message *rm, *tmp; 421 struct rds_sock *rs = NULL;
386 int ret = 1; 422 struct rm_rdma_op *ro;
423 struct rds_notifier *notifier;
424 unsigned long flags;
387 425
388 spin_lock(&conn->c_lock); 426 spin_lock_irqsave(&rm->m_rs_lock, flags);
389 427
390 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 428 ro = &rm->rdma;
391 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 429 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
392 ret = 0; 430 ro->op_active && ro->op_notify && ro->op_notifier) {
393 break; 431 notifier = ro->op_notifier;
394 } 432 rs = rm->m_rs;
433 sock_hold(rds_rs_to_sk(rs));
395 434
396 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 435 notifier->n_status = status;
397 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 436 spin_lock(&rs->rs_lock);
398 ret = 0; 437 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
399 break; 438 spin_unlock(&rs->rs_lock);
439
440 ro->op_notifier = NULL;
400 } 441 }
401 442
402 spin_unlock(&conn->c_lock); 443 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
403 444
404 return ret; 445 if (rs) {
446 rds_wake_sk_sleep(rs);
447 sock_put(rds_rs_to_sk(rs));
448 }
405} 449}
450EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
406 451
407/* 452/*
408 * This is pretty similar to what happens below in the ACK 453 * Just like above, except looks at atomic op
409 * handling code - except that we call here as soon as we get
410 * the IB send completion on the RDMA op and the accompanying
411 * message.
412 */ 454 */
413void rds_rdma_send_complete(struct rds_message *rm, int status) 455void rds_atomic_send_complete(struct rds_message *rm, int status)
414{ 456{
415 struct rds_sock *rs = NULL; 457 struct rds_sock *rs = NULL;
416 struct rds_rdma_op *ro; 458 struct rm_atomic_op *ao;
417 struct rds_notifier *notifier; 459 struct rds_notifier *notifier;
460 unsigned long flags;
418 461
419 spin_lock(&rm->m_rs_lock); 462 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 463
421 ro = rm->m_rdma_op; 464 ao = &rm->atomic;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 465 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
423 ro && ro->r_notify && ro->r_notifier) { 466 && ao->op_active && ao->op_notify && ao->op_notifier) {
424 notifier = ro->r_notifier; 467 notifier = ao->op_notifier;
425 rs = rm->m_rs; 468 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 469 sock_hold(rds_rs_to_sk(rs));
427 470
@@ -430,17 +473,17 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
430 list_add_tail(&notifier->n_list, &rs->rs_notify_queue); 473 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
431 spin_unlock(&rs->rs_lock); 474 spin_unlock(&rs->rs_lock);
432 475
433 ro->r_notifier = NULL; 476 ao->op_notifier = NULL;
434 } 477 }
435 478
436 spin_unlock(&rm->m_rs_lock); 479 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
437 480
438 if (rs) { 481 if (rs) {
439 rds_wake_sk_sleep(rs); 482 rds_wake_sk_sleep(rs);
440 sock_put(rds_rs_to_sk(rs)); 483 sock_put(rds_rs_to_sk(rs));
441 } 484 }
442} 485}
443EXPORT_SYMBOL_GPL(rds_rdma_send_complete); 486EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
444 487
445/* 488/*
446 * This is the same as rds_rdma_send_complete except we 489 * This is the same as rds_rdma_send_complete except we
@@ -448,15 +491,23 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
448 * socket, socket lock) and can just move the notifier. 491 * socket, socket lock) and can just move the notifier.
449 */ 492 */
450static inline void 493static inline void
451__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) 494__rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
452{ 495{
453 struct rds_rdma_op *ro; 496 struct rm_rdma_op *ro;
497 struct rm_atomic_op *ao;
498
499 ro = &rm->rdma;
500 if (ro->op_active && ro->op_notify && ro->op_notifier) {
501 ro->op_notifier->n_status = status;
502 list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
503 ro->op_notifier = NULL;
504 }
454 505
455 ro = rm->m_rdma_op; 506 ao = &rm->atomic;
456 if (ro && ro->r_notify && ro->r_notifier) { 507 if (ao->op_active && ao->op_notify && ao->op_notifier) {
457 ro->r_notifier->n_status = status; 508 ao->op_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 509 list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 510 ao->op_notifier = NULL;
460 } 511 }
461 512
462 /* No need to wake the app - caller does this */ 513 /* No need to wake the app - caller does this */
@@ -468,7 +519,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
468 * So speed is not an issue here. 519 * So speed is not an issue here.
469 */ 520 */
470struct rds_message *rds_send_get_message(struct rds_connection *conn, 521struct rds_message *rds_send_get_message(struct rds_connection *conn,
471 struct rds_rdma_op *op) 522 struct rm_rdma_op *op)
472{ 523{
473 struct rds_message *rm, *tmp, *found = NULL; 524 struct rds_message *rm, *tmp, *found = NULL;
474 unsigned long flags; 525 unsigned long flags;
@@ -476,7 +527,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 527 spin_lock_irqsave(&conn->c_lock, flags);
477 528
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 529 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->m_rdma_op == op) { 530 if (&rm->rdma == op) {
480 atomic_inc(&rm->m_refcount); 531 atomic_inc(&rm->m_refcount);
481 found = rm; 532 found = rm;
482 goto out; 533 goto out;
@@ -484,7 +535,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 535 }
485 536
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 537 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->m_rdma_op == op) { 538 if (&rm->rdma == op) {
488 atomic_inc(&rm->m_refcount); 539 atomic_inc(&rm->m_refcount);
489 found = rm; 540 found = rm;
490 break; 541 break;
@@ -506,7 +557,7 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
506 * removing the messages from the 'messages' list regardless of if it found 557 * removing the messages from the 'messages' list regardless of if it found
507 * the messages on the socket list or not. 558 * the messages on the socket list or not.
508 */ 559 */
509void rds_send_remove_from_sock(struct list_head *messages, int status) 560static void rds_send_remove_from_sock(struct list_head *messages, int status)
510{ 561{
511 unsigned long flags; 562 unsigned long flags;
512 struct rds_sock *rs = NULL; 563 struct rds_sock *rs = NULL;
@@ -544,19 +595,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 595 spin_lock(&rs->rs_lock);
545 596
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 597 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 598 struct rm_rdma_op *ro = &rm->rdma;
548 struct rds_notifier *notifier; 599 struct rds_notifier *notifier;
549 600
550 list_del_init(&rm->m_sock_item); 601 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 602 rds_send_sndbuf_remove(rs, rm);
552 603
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 604 if (ro->op_active && ro->op_notifier &&
554 notifier = ro->r_notifier; 605 (ro->op_notify || (ro->op_recverr && status))) {
606 notifier = ro->op_notifier;
555 list_add_tail(&notifier->n_list, 607 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 608 &rs->rs_notify_queue);
557 if (!notifier->n_status) 609 if (!notifier->n_status)
558 notifier->n_status = status; 610 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 611 rm->rdma.op_notifier = NULL;
560 } 612 }
561 was_on_sock = 1; 613 was_on_sock = 1;
562 rm->m_rs = NULL; 614 rm->m_rs = NULL;
@@ -619,9 +671,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 671{
620 struct rds_message *rm, *tmp; 672 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 673 struct rds_connection *conn;
622 unsigned long flags, flags2; 674 unsigned long flags;
623 LIST_HEAD(list); 675 LIST_HEAD(list);
624 int wake = 0;
625 676
626 /* get all the messages we're dropping under the rs lock */ 677 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 678 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +682,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 682 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 683 continue;
633 684
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 685 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 686 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 687 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 688 }
639 689
640 /* order flag updates with the rs lock */ 690 /* order flag updates with the rs lock */
641 if (wake) 691 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 692
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 693 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 694
646 conn = NULL; 695 if (list_empty(&list))
696 return;
647 697
648 /* now remove the messages from the conn list as needed */ 698 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 699 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that
651 * we don't have to nest m_rs_lock under rs->rs_lock */
652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 700
701 conn = rm->m_inc.i_conn;
702
703 spin_lock_irqsave(&conn->c_lock, flags);
660 /* 704 /*
661 * If we see this flag cleared then we're *sure* that someone 705 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 706 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 707 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 708 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 709 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
710 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 711 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 712 }
713 list_del_init(&rm->m_conn_item);
714 spin_unlock_irqrestore(&conn->c_lock, flags);
675 715
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 716 /*
677 list_del_init(&rm->m_conn_item); 717 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 718 * but we can now.
679 } 719 */
680 } 720 spin_lock_irqsave(&rm->m_rs_lock, flags);
681 721
682 if (conn) 722 spin_lock(&rs->rs_lock);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 723 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
724 spin_unlock(&rs->rs_lock);
684 725
685 if (wake) 726 rm->m_rs = NULL;
686 rds_wake_sk_sleep(rs); 727 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
728
729 rds_message_put(rm);
730 }
731
732 rds_wake_sk_sleep(rs);
687 733
688 while (!list_empty(&list)) { 734 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 735 rm = list_entry(list.next, struct rds_message, m_sock_item);
@@ -763,6 +809,63 @@ out:
763 return *queued; 809 return *queued;
764} 810}
765 811
812/*
813 * rds_message is getting to be quite complicated, and we'd like to allocate
814 * it all in one go. This figures out how big it needs to be up front.
815 */
816static int rds_rm_size(struct msghdr *msg, int data_len)
817{
818 struct cmsghdr *cmsg;
819 int size = 0;
820 int cmsg_groups = 0;
821 int retval;
822
823 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
824 if (!CMSG_OK(msg, cmsg))
825 return -EINVAL;
826
827 if (cmsg->cmsg_level != SOL_RDS)
828 continue;
829
830 switch (cmsg->cmsg_type) {
831 case RDS_CMSG_RDMA_ARGS:
832 cmsg_groups |= 1;
833 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
834 if (retval < 0)
835 return retval;
836 size += retval;
837
838 break;
839
840 case RDS_CMSG_RDMA_DEST:
841 case RDS_CMSG_RDMA_MAP:
842 cmsg_groups |= 2;
843 /* these are valid but do no add any size */
844 break;
845
846 case RDS_CMSG_ATOMIC_CSWP:
847 case RDS_CMSG_ATOMIC_FADD:
848 case RDS_CMSG_MASKED_ATOMIC_CSWP:
849 case RDS_CMSG_MASKED_ATOMIC_FADD:
850 cmsg_groups |= 1;
851 size += sizeof(struct scatterlist);
852 break;
853
854 default:
855 return -EINVAL;
856 }
857
858 }
859
860 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
861
862 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
863 if (cmsg_groups == 3)
864 return -EINVAL;
865
866 return size;
867}
868
766static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 869static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
767 struct msghdr *msg, int *allocated_mr) 870 struct msghdr *msg, int *allocated_mr)
768{ 871{
@@ -777,7 +880,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
777 continue; 880 continue;
778 881
779 /* As a side effect, RDMA_DEST and RDMA_MAP will set 882 /* As a side effect, RDMA_DEST and RDMA_MAP will set
780 * rm->m_rdma_cookie and rm->m_rdma_mr. 883 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
781 */ 884 */
782 switch (cmsg->cmsg_type) { 885 switch (cmsg->cmsg_type) {
783 case RDS_CMSG_RDMA_ARGS: 886 case RDS_CMSG_RDMA_ARGS:
@@ -793,6 +896,12 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
793 if (!ret) 896 if (!ret)
794 *allocated_mr = 1; 897 *allocated_mr = 1;
795 break; 898 break;
899 case RDS_CMSG_ATOMIC_CSWP:
900 case RDS_CMSG_ATOMIC_FADD:
901 case RDS_CMSG_MASKED_ATOMIC_CSWP:
902 case RDS_CMSG_MASKED_ATOMIC_FADD:
903 ret = rds_cmsg_atomic(rs, rm, cmsg);
904 break;
796 905
797 default: 906 default:
798 return -EINVAL; 907 return -EINVAL;
@@ -850,13 +959,30 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
850 goto out; 959 goto out;
851 } 960 }
852 961
853 rm = rds_message_copy_from_user(msg->msg_iov, payload_len); 962 /* size of rm including all sgs */
854 if (IS_ERR(rm)) { 963 ret = rds_rm_size(msg, payload_len);
855 ret = PTR_ERR(rm); 964 if (ret < 0)
856 rm = NULL; 965 goto out;
966
967 rm = rds_message_alloc(ret, GFP_KERNEL);
968 if (!rm) {
969 ret = -ENOMEM;
857 goto out; 970 goto out;
858 } 971 }
859 972
973 /* Attach data to the rm */
974 if (payload_len) {
975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
976 if (!rm->data.op_sg) {
977 ret = -ENOMEM;
978 goto out;
979 }
980 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
981 if (ret)
982 goto out;
983 }
984 rm->data.op_active = 1;
985
860 rm->m_daddr = daddr; 986 rm->m_daddr = daddr;
861 987
862 /* rds_conn_create has a spinlock that runs with IRQ off. 988 /* rds_conn_create has a spinlock that runs with IRQ off.
@@ -879,22 +1005,23 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
879 if (ret) 1005 if (ret)
880 goto out; 1006 goto out;
881 1007
882 if ((rm->m_rdma_cookie || rm->m_rdma_op) && 1008 if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
883 conn->c_trans->xmit_rdma == NULL) {
884 if (printk_ratelimit()) 1009 if (printk_ratelimit())
885 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 1010 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
886 rm->m_rdma_op, conn->c_trans->xmit_rdma); 1011 &rm->rdma, conn->c_trans->xmit_rdma);
887 ret = -EOPNOTSUPP; 1012 ret = -EOPNOTSUPP;
888 goto out; 1013 goto out;
889 } 1014 }
890 1015
891 /* If the connection is down, trigger a connect. We may 1016 if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
892 * have scheduled a delayed reconnect however - in this case 1017 if (printk_ratelimit())
893 * we should not interfere. 1018 printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
894 */ 1019 &rm->atomic, conn->c_trans->xmit_atomic);
895 if (rds_conn_state(conn) == RDS_CONN_DOWN && 1020 ret = -EOPNOTSUPP;
896 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) 1021 goto out;
897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 1022 }
1023
1024 rds_conn_connect_if_down(conn);
898 1025
899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1026 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
900 if (ret) { 1027 if (ret) {
@@ -938,7 +1065,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
938 rds_stats_inc(s_send_queued); 1065 rds_stats_inc(s_send_queued);
939 1066
940 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1067 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
941 rds_send_worker(&conn->c_send_w.work); 1068 rds_send_xmit(conn);
942 1069
943 rds_message_put(rm); 1070 rds_message_put(rm);
944 return payload_len; 1071 return payload_len;
@@ -966,20 +1093,15 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
966 int ret = 0; 1093 int ret = 0;
967 1094
968 rm = rds_message_alloc(0, GFP_ATOMIC); 1095 rm = rds_message_alloc(0, GFP_ATOMIC);
969 if (rm == NULL) { 1096 if (!rm) {
970 ret = -ENOMEM; 1097 ret = -ENOMEM;
971 goto out; 1098 goto out;
972 } 1099 }
973 1100
974 rm->m_daddr = conn->c_faddr; 1101 rm->m_daddr = conn->c_faddr;
1102 rm->data.op_active = 1;
975 1103
976 /* If the connection is down, trigger a connect. We may 1104 rds_conn_connect_if_down(conn);
977 * have scheduled a delayed reconnect however - in this case
978 * we should not interfere.
979 */
980 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
981 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
982 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
983 1105
984 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); 1106 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
985 if (ret) 1107 if (ret)
@@ -999,7 +1121,9 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
999 rds_stats_inc(s_send_queued); 1121 rds_stats_inc(s_send_queued);
1000 rds_stats_inc(s_send_pong); 1122 rds_stats_inc(s_send_pong);
1001 1123
1002 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1124 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1125 rds_send_xmit(conn);
1126
1003 rds_message_put(rm); 1127 rds_message_put(rm);
1004 return 0; 1128 return 0;
1005 1129
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb1..10c759ccac0c 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -57,8 +57,8 @@ static const char *const rds_stat_names[] = {
57 "recv_ping", 57 "recv_ping",
58 "send_queue_empty", 58 "send_queue_empty",
59 "send_queue_full", 59 "send_queue_full",
60 "send_sem_contention", 60 "send_lock_contention",
61 "send_sem_queue_raced", 61 "send_lock_queue_raced",
62 "send_immediate_retry", 62 "send_immediate_retry",
63 "send_delayed_retry", 63 "send_delayed_retry",
64 "send_drop_acked", 64 "send_drop_acked",
@@ -143,7 +143,7 @@ void rds_stats_exit(void)
143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); 143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
144} 144}
145 145
146int __init rds_stats_init(void) 146int rds_stats_init(void)
147{ 147{
148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); 148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
149 return 0; 149 return 0;
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 7829a20325d3..25ad0c77a26c 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -105,13 +105,13 @@ void rds_sysctl_exit(void)
105 unregister_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
106} 106}
107 107
108int __init rds_sysctl_init(void) 108int rds_sysctl_init(void)
109{ 109{
110 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
112 112
113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
114 if (rds_sysctl_reg_table == NULL) 114 if (!rds_sysctl_reg_table)
115 return -ENOMEM; 115 return -ENOMEM;
116 return 0; 116 return 0;
117} 117}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index babf4577ff7d..8e0a32001c90 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -41,7 +41,7 @@
41/* only for info exporting */ 41/* only for info exporting */
42static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); 42static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
43static LIST_HEAD(rds_tcp_tc_list); 43static LIST_HEAD(rds_tcp_tc_list);
44unsigned int rds_tcp_tc_count; 44static unsigned int rds_tcp_tc_count;
45 45
46/* Track rds_tcp_connection structs so they can be cleaned up */ 46/* Track rds_tcp_connection structs so they can be cleaned up */
47static DEFINE_SPINLOCK(rds_tcp_conn_lock); 47static DEFINE_SPINLOCK(rds_tcp_conn_lock);
@@ -200,7 +200,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
200 struct rds_tcp_connection *tc; 200 struct rds_tcp_connection *tc;
201 201
202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
203 if (tc == NULL) 203 if (!tc)
204 return -ENOMEM; 204 return -ENOMEM;
205 205
206 tc->t_sock = NULL; 206 tc->t_sock = NULL;
@@ -221,7 +221,13 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
221static void rds_tcp_conn_free(void *arg) 221static void rds_tcp_conn_free(void *arg)
222{ 222{
223 struct rds_tcp_connection *tc = arg; 223 struct rds_tcp_connection *tc = arg;
224 unsigned long flags;
224 rdsdebug("freeing tc %p\n", tc); 225 rdsdebug("freeing tc %p\n", tc);
226
227 spin_lock_irqsave(&rds_tcp_conn_lock, flags);
228 list_del(&tc->t_tcp_node);
229 spin_unlock_irqrestore(&rds_tcp_conn_lock, flags);
230
225 kmem_cache_free(rds_tcp_conn_slab, tc); 231 kmem_cache_free(rds_tcp_conn_slab, tc);
226} 232}
227 233
@@ -243,7 +249,7 @@ static void rds_tcp_destroy_conns(void)
243 } 249 }
244} 250}
245 251
246void rds_tcp_exit(void) 252static void rds_tcp_exit(void)
247{ 253{
248 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 254 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
249 rds_tcp_listen_stop(); 255 rds_tcp_listen_stop();
@@ -258,7 +264,6 @@ struct rds_transport rds_tcp_transport = {
258 .laddr_check = rds_tcp_laddr_check, 264 .laddr_check = rds_tcp_laddr_check,
259 .xmit_prepare = rds_tcp_xmit_prepare, 265 .xmit_prepare = rds_tcp_xmit_prepare,
260 .xmit_complete = rds_tcp_xmit_complete, 266 .xmit_complete = rds_tcp_xmit_complete,
261 .xmit_cong_map = rds_tcp_xmit_cong_map,
262 .xmit = rds_tcp_xmit, 267 .xmit = rds_tcp_xmit,
263 .recv = rds_tcp_recv, 268 .recv = rds_tcp_recv,
264 .conn_alloc = rds_tcp_conn_alloc, 269 .conn_alloc = rds_tcp_conn_alloc,
@@ -266,7 +271,6 @@ struct rds_transport rds_tcp_transport = {
266 .conn_connect = rds_tcp_conn_connect, 271 .conn_connect = rds_tcp_conn_connect,
267 .conn_shutdown = rds_tcp_conn_shutdown, 272 .conn_shutdown = rds_tcp_conn_shutdown,
268 .inc_copy_to_user = rds_tcp_inc_copy_to_user, 273 .inc_copy_to_user = rds_tcp_inc_copy_to_user,
269 .inc_purge = rds_tcp_inc_purge,
270 .inc_free = rds_tcp_inc_free, 274 .inc_free = rds_tcp_inc_free,
271 .stats_info_copy = rds_tcp_stats_info_copy, 275 .stats_info_copy = rds_tcp_stats_info_copy,
272 .exit = rds_tcp_exit, 276 .exit = rds_tcp_exit,
@@ -276,14 +280,14 @@ struct rds_transport rds_tcp_transport = {
276 .t_prefer_loopback = 1, 280 .t_prefer_loopback = 1,
277}; 281};
278 282
279int __init rds_tcp_init(void) 283static int rds_tcp_init(void)
280{ 284{
281 int ret; 285 int ret;
282 286
283 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 287 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
284 sizeof(struct rds_tcp_connection), 288 sizeof(struct rds_tcp_connection),
285 0, 0, NULL); 289 0, 0, NULL);
286 if (rds_tcp_conn_slab == NULL) { 290 if (!rds_tcp_conn_slab) {
287 ret = -ENOMEM; 291 ret = -ENOMEM;
288 goto out; 292 goto out;
289 } 293 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 844fa6b9cf5a..9cf2927d0021 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -43,8 +43,6 @@ struct rds_tcp_statistics {
43}; 43};
44 44
45/* tcp.c */ 45/* tcp.c */
46int __init rds_tcp_init(void);
47void rds_tcp_exit(void);
48void rds_tcp_tune(struct socket *sock); 46void rds_tcp_tune(struct socket *sock);
49void rds_tcp_nonagle(struct socket *sock); 47void rds_tcp_nonagle(struct socket *sock);
50void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); 48void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
@@ -61,16 +59,15 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
61void rds_tcp_state_change(struct sock *sk); 59void rds_tcp_state_change(struct sock *sk);
62 60
63/* tcp_listen.c */ 61/* tcp_listen.c */
64int __init rds_tcp_listen_init(void); 62int rds_tcp_listen_init(void);
65void rds_tcp_listen_stop(void); 63void rds_tcp_listen_stop(void);
66void rds_tcp_listen_data_ready(struct sock *sk, int bytes); 64void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
67 65
68/* tcp_recv.c */ 66/* tcp_recv.c */
69int __init rds_tcp_recv_init(void); 67int rds_tcp_recv_init(void);
70void rds_tcp_recv_exit(void); 68void rds_tcp_recv_exit(void);
71void rds_tcp_data_ready(struct sock *sk, int bytes); 69void rds_tcp_data_ready(struct sock *sk, int bytes);
72int rds_tcp_recv(struct rds_connection *conn); 70int rds_tcp_recv(struct rds_connection *conn);
73void rds_tcp_inc_purge(struct rds_incoming *inc);
74void rds_tcp_inc_free(struct rds_incoming *inc); 71void rds_tcp_inc_free(struct rds_incoming *inc);
75int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 72int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
76 size_t size); 73 size_t size);
@@ -81,8 +78,6 @@ void rds_tcp_xmit_complete(struct rds_connection *conn);
81int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 78int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
82 unsigned int hdr_off, unsigned int sg, unsigned int off); 79 unsigned int hdr_off, unsigned int sg, unsigned int off);
83void rds_tcp_write_space(struct sock *sk); 80void rds_tcp_write_space(struct sock *sk);
84int rds_tcp_xmit_cong_map(struct rds_connection *conn,
85 struct rds_cong_map *map, unsigned long offset);
86 81
87/* tcp_stats.c */ 82/* tcp_stats.c */
88DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); 83DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c519939e8da9..af95c8e058fc 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -45,7 +45,7 @@ void rds_tcp_state_change(struct sock *sk)
45 45
46 read_lock_bh(&sk->sk_callback_lock); 46 read_lock_bh(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (conn == NULL) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
50 goto out; 50 goto out;
51 } 51 }
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 27844f231d10..8b5cc4aa8868 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -116,7 +116,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
116 116
117 read_lock_bh(&sk->sk_callback_lock); 117 read_lock_bh(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (ready == NULL) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
121 goto out; 121 goto out;
122 } 122 }
@@ -135,7 +135,7 @@ out:
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
138int __init rds_tcp_listen_init(void) 138int rds_tcp_listen_init(void)
139{ 139{
140 struct sockaddr_in sin; 140 struct sockaddr_in sin;
141 struct socket *sock = NULL; 141 struct socket *sock = NULL;
@@ -178,7 +178,7 @@ void rds_tcp_listen_stop(void)
178 struct socket *sock = rds_tcp_listen_sock; 178 struct socket *sock = rds_tcp_listen_sock;
179 struct sock *sk; 179 struct sock *sk;
180 180
181 if (sock == NULL) 181 if (!sock)
182 return; 182 return;
183 183
184 sk = sock->sk; 184 sk = sock->sk;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e43797404102..78205e25500a 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -39,7 +39,7 @@
39 39
40static struct kmem_cache *rds_tcp_incoming_slab; 40static struct kmem_cache *rds_tcp_incoming_slab;
41 41
42void rds_tcp_inc_purge(struct rds_incoming *inc) 42static void rds_tcp_inc_purge(struct rds_incoming *inc)
43{ 43{
44 struct rds_tcp_incoming *tinc; 44 struct rds_tcp_incoming *tinc;
45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); 45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
@@ -190,10 +190,10 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
190 * processing. 190 * processing.
191 */ 191 */
192 while (left) { 192 while (left) {
193 if (tinc == NULL) { 193 if (!tinc) {
194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab, 194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
195 arg->gfp); 195 arg->gfp);
196 if (tinc == NULL) { 196 if (!tinc) {
197 desc->error = -ENOMEM; 197 desc->error = -ENOMEM;
198 goto out; 198 goto out;
199 } 199 }
@@ -229,7 +229,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
229 229
230 if (left && tc->t_tinc_data_rem) { 230 if (left && tc->t_tinc_data_rem) {
231 clone = skb_clone(skb, arg->gfp); 231 clone = skb_clone(skb, arg->gfp);
232 if (clone == NULL) { 232 if (!clone) {
233 desc->error = -ENOMEM; 233 desc->error = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
@@ -272,7 +272,8 @@ out:
272} 272}
273 273
274/* the caller has to hold the sock lock */ 274/* the caller has to hold the sock lock */
275int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, enum km_type km) 275static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp,
276 enum km_type km)
276{ 277{
277 struct rds_tcp_connection *tc = conn->c_transport_data; 278 struct rds_tcp_connection *tc = conn->c_transport_data;
278 struct socket *sock = tc->t_sock; 279 struct socket *sock = tc->t_sock;
@@ -326,7 +327,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
326 327
327 read_lock_bh(&sk->sk_callback_lock); 328 read_lock_bh(&sk->sk_callback_lock);
328 conn = sk->sk_user_data; 329 conn = sk->sk_user_data;
329 if (conn == NULL) { /* check for teardown race */ 330 if (!conn) { /* check for teardown race */
330 ready = sk->sk_data_ready; 331 ready = sk->sk_data_ready;
331 goto out; 332 goto out;
332 } 333 }
@@ -342,12 +343,12 @@ out:
342 ready(sk, bytes); 343 ready(sk, bytes);
343} 344}
344 345
345int __init rds_tcp_recv_init(void) 346int rds_tcp_recv_init(void)
346{ 347{
347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", 348 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
348 sizeof(struct rds_tcp_incoming), 349 sizeof(struct rds_tcp_incoming),
349 0, 0, NULL); 350 0, 0, NULL);
350 if (rds_tcp_incoming_slab == NULL) 351 if (!rds_tcp_incoming_slab)
351 return -ENOMEM; 352 return -ENOMEM;
352 return 0; 353 return 0;
353} 354}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 2f012a07d94d..1b4fd68f0c7c 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -63,7 +63,7 @@ void rds_tcp_xmit_complete(struct rds_connection *conn)
63} 63}
64 64
65/* the core send_sem serializes this with other xmit and shutdown */ 65/* the core send_sem serializes this with other xmit and shutdown */
66int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) 66static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
67{ 67{
68 struct kvec vec = { 68 struct kvec vec = {
69 .iov_base = data, 69 .iov_base = data,
@@ -77,56 +77,6 @@ int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
77} 77}
78 78
79/* the core send_sem serializes this with other xmit and shutdown */ 79/* the core send_sem serializes this with other xmit and shutdown */
80int rds_tcp_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, unsigned long offset)
82{
83 static struct rds_header rds_tcp_map_header = {
84 .h_flags = RDS_FLAG_CONG_BITMAP,
85 };
86 struct rds_tcp_connection *tc = conn->c_transport_data;
87 unsigned long i;
88 int ret;
89 int copied = 0;
90
91 /* Some problem claims cpu_to_be32(constant) isn't a constant. */
92 rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
93
94 if (offset < sizeof(struct rds_header)) {
95 ret = rds_tcp_sendmsg(tc->t_sock,
96 (void *)&rds_tcp_map_header + offset,
97 sizeof(struct rds_header) - offset);
98 if (ret <= 0)
99 return ret;
100 offset += ret;
101 copied = ret;
102 if (offset < sizeof(struct rds_header))
103 return ret;
104 }
105
106 offset -= sizeof(struct rds_header);
107 i = offset / PAGE_SIZE;
108 offset = offset % PAGE_SIZE;
109 BUG_ON(i >= RDS_CONG_MAP_PAGES);
110
111 do {
112 ret = tc->t_sock->ops->sendpage(tc->t_sock,
113 virt_to_page(map->m_page_addrs[i]),
114 offset, PAGE_SIZE - offset,
115 MSG_DONTWAIT);
116 if (ret <= 0)
117 break;
118 copied += ret;
119 offset += ret;
120 if (offset == PAGE_SIZE) {
121 offset = 0;
122 i++;
123 }
124 } while (i < RDS_CONG_MAP_PAGES);
125
126 return copied ? copied : ret;
127}
128
129/* the core send_sem serializes this with other xmit and shutdown */
130int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
131 unsigned int hdr_off, unsigned int sg, unsigned int off) 81 unsigned int hdr_off, unsigned int sg, unsigned int off)
132{ 82{
@@ -166,21 +116,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
166 goto out; 116 goto out;
167 } 117 }
168 118
169 while (sg < rm->m_nents) { 119 while (sg < rm->data.op_nents) {
170 ret = tc->t_sock->ops->sendpage(tc->t_sock, 120 ret = tc->t_sock->ops->sendpage(tc->t_sock,
171 sg_page(&rm->m_sg[sg]), 121 sg_page(&rm->data.op_sg[sg]),
172 rm->m_sg[sg].offset + off, 122 rm->data.op_sg[sg].offset + off,
173 rm->m_sg[sg].length - off, 123 rm->data.op_sg[sg].length - off,
174 MSG_DONTWAIT|MSG_NOSIGNAL); 124 MSG_DONTWAIT|MSG_NOSIGNAL);
175 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), 125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
176 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, 126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
177 ret); 127 ret);
178 if (ret <= 0) 128 if (ret <= 0)
179 break; 129 break;
180 130
181 off += ret; 131 off += ret;
182 done += ret; 132 done += ret;
183 if (off == rm->m_sg[sg].length) { 133 if (off == rm->data.op_sg[sg].length) {
184 off = 0; 134 off = 0;
185 sg++; 135 sg++;
186 } 136 }
@@ -226,7 +176,7 @@ void rds_tcp_write_space(struct sock *sk)
226 176
227 read_lock_bh(&sk->sk_callback_lock); 177 read_lock_bh(&sk->sk_callback_lock);
228 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
229 if (conn == NULL) { 179 if (!conn) {
230 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
231 goto out; 181 goto out;
232 } 182 }
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 786c20eaaf5e..0fd90f8c5f59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -61,7 +61,7 @@
61 * 61 *
62 * Transition to state DISCONNECTING/DOWN: 62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path 63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks 64 * through RDS_IN_XMIT, and with connection management callbacks
65 * via c_cm_lock. 65 * via c_cm_lock.
66 * 66 *
67 * For receive callbacks, we rely on the underlying transport 67 * For receive callbacks, we rely on the underlying transport
@@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
110 * We should *always* start with a random backoff; otherwise a broken connection 110 * We should *always* start with a random backoff; otherwise a broken connection
111 * will always take several iterations to be re-established. 111 * will always take several iterations to be re-established.
112 */ 112 */
113static void rds_queue_reconnect(struct rds_connection *conn) 113void rds_queue_reconnect(struct rds_connection *conn)
114{ 114{
115 unsigned long rand; 115 unsigned long rand;
116 116
@@ -156,58 +156,6 @@ void rds_connect_worker(struct work_struct *work)
156 } 156 }
157} 157}
158 158
159void rds_shutdown_worker(struct work_struct *work)
160{
161 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
162
163 /* shut it down unless it's down already */
164 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
165 /*
166 * Quiesce the connection mgmt handlers before we start tearing
167 * things down. We don't hold the mutex for the entire
168 * duration of the shutdown operation, else we may be
169 * deadlocking with the CM handler. Instead, the CM event
170 * handler is supposed to check for state DISCONNECTING
171 */
172 mutex_lock(&conn->c_cm_lock);
173 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) &&
174 !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
175 rds_conn_error(conn, "shutdown called in state %d\n",
176 atomic_read(&conn->c_state));
177 mutex_unlock(&conn->c_cm_lock);
178 return;
179 }
180 mutex_unlock(&conn->c_cm_lock);
181
182 mutex_lock(&conn->c_send_lock);
183 conn->c_trans->conn_shutdown(conn);
184 rds_conn_reset(conn);
185 mutex_unlock(&conn->c_send_lock);
186
187 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
188 /* This can happen - eg when we're in the middle of tearing
189 * down the connection, and someone unloads the rds module.
190 * Quite reproduceable with loopback connections.
191 * Mostly harmless.
192 */
193 rds_conn_error(conn,
194 "%s: failed to transition to state DOWN, "
195 "current state is %d\n",
196 __func__,
197 atomic_read(&conn->c_state));
198 return;
199 }
200 }
201
202 /* Then reconnect if it's still live.
203 * The passive side of an IB loopback connection is never added
204 * to the conn hash, so we never trigger a reconnect on this
205 * conn - the reconnect is always triggered by the active peer. */
206 cancel_delayed_work(&conn->c_conn_w);
207 if (!hlist_unhashed(&conn->c_hash_node))
208 rds_queue_reconnect(conn);
209}
210
211void rds_send_worker(struct work_struct *work) 159void rds_send_worker(struct work_struct *work)
212{ 160{
213 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); 161 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
@@ -252,15 +200,22 @@ void rds_recv_worker(struct work_struct *work)
252 } 200 }
253} 201}
254 202
203void rds_shutdown_worker(struct work_struct *work)
204{
205 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
206
207 rds_conn_shutdown(conn);
208}
209
255void rds_threads_exit(void) 210void rds_threads_exit(void)
256{ 211{
257 destroy_workqueue(rds_wq); 212 destroy_workqueue(rds_wq);
258} 213}
259 214
260int __init rds_threads_init(void) 215int rds_threads_init(void)
261{ 216{
262 rds_wq = create_workqueue("krdsd"); 217 rds_wq = create_singlethread_workqueue("krdsd");
263 if (rds_wq == NULL) 218 if (!rds_wq)
264 return -ENOMEM; 219 return -ENOMEM;
265 220
266 return 0; 221 return 0;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7e1067901353..7f2ac4fec367 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -71,19 +71,28 @@ void rds_trans_unregister(struct rds_transport *trans)
71} 71}
72EXPORT_SYMBOL_GPL(rds_trans_unregister); 72EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans)
75{
76 if (trans && trans->t_owner)
77 module_put(trans->t_owner);
78}
79
74struct rds_transport *rds_trans_get_preferred(__be32 addr) 80struct rds_transport *rds_trans_get_preferred(__be32 addr)
75{ 81{
76 struct rds_transport *ret = NULL; 82 struct rds_transport *ret = NULL;
77 int i; 83 struct rds_transport *trans;
84 unsigned int i;
78 85
79 if (IN_LOOPBACK(ntohl(addr))) 86 if (IN_LOOPBACK(ntohl(addr)))
80 return &rds_loop_transport; 87 return &rds_loop_transport;
81 88
82 down_read(&rds_trans_sem); 89 down_read(&rds_trans_sem);
83 for (i = 0; i < RDS_TRANS_COUNT; i++) 90 for (i = 0; i < RDS_TRANS_COUNT; i++) {
84 { 91 trans = transports[i];
85 if (transports[i] && (transports[i]->laddr_check(addr) == 0)) { 92
86 ret = transports[i]; 93 if (trans && (trans->laddr_check(addr) == 0) &&
94 (!trans->t_owner || try_module_get(trans->t_owner))) {
95 ret = trans;
87 break; 96 break;
88 } 97 }
89 } 98 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 000000000000..e6b5190daddd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 51875a0c5d48..04f599089e6d 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1241,6 +1241,7 @@ static const struct file_operations rfkill_fops = {
1241 .unlocked_ioctl = rfkill_fop_ioctl, 1241 .unlocked_ioctl = rfkill_fop_ioctl,
1242 .compat_ioctl = rfkill_fop_ioctl, 1242 .compat_ioctl = rfkill_fop_ioctl,
1243#endif 1243#endif
1244 .llseek = no_llseek,
1244}; 1245};
1245 1246
1246static struct miscdevice rfkill_miscdev = { 1247static struct miscdevice rfkill_miscdev = {
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 3713d7ecab96..1bca6d49ec96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled;
142static unsigned long rfkill_ratelimit(const unsigned long last) 142static unsigned long rfkill_ratelimit(const unsigned long last)
143{ 143{
144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); 144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
145 return (time_after(jiffies, last + delay)) ? 0 : delay; 145 return time_after(jiffies, last + delay) ? 0 : delay;
146} 146}
147 147
148static void rfkill_schedule_ratelimited(void) 148static void rfkill_schedule_ratelimited(void)
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index a750a28e0221..fa5f5641a2c2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
114 if (ax25s) 114 if (ax25s)
115 ax25_cb_put(ax25s); 115 ax25_cb_put(ax25s);
116 116
117 return (neigh->ax25 != NULL); 117 return neigh->ax25 != NULL;
118} 118}
119 119
120/* 120/*
@@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh)
137 if (ax25s) 137 if (ax25s)
138 ax25_cb_put(ax25s); 138 ax25_cb_put(ax25s);
139 139
140 return (neigh->ax25 != NULL); 140 return neigh->ax25 != NULL;
141} 141}
142 142
143/* 143/*
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..a36270a994d7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 518 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 519 module will be called act_skbedit.
520 520
521config NET_ACT_CSUM
522 tristate "Checksum Updating"
523 depends on NET_CLS_ACT && INET
524 ---help---
525 Say Y here to update some common checksum after some direct
526 packet alterations.
527
528 To compile this code as a module, choose M here: the
529 module will be called act_csum.
530
521config NET_CLS_IND 531config NET_CLS_IND
522 bool "Incoming device classification" 532 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 533 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..67dc7ce9b63a
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 p->tcf_bstats.bytes += qdisc_pkt_len(skb);
512 p->tcf_bstats.packets++;
513 action = p->tcf_action;
514 update_flags = p->update_flags;
515 spin_unlock(&p->tcf_lock);
516
517 if (unlikely(action == TC_ACT_SHOT))
518 goto drop;
519
520 switch (skb->protocol) {
521 case cpu_to_be16(ETH_P_IP):
522 if (!tcf_csum_ipv4(skb, update_flags))
523 goto drop;
524 break;
525 case cpu_to_be16(ETH_P_IPV6):
526 if (!tcf_csum_ipv6(skb, update_flags))
527 goto drop;
528 break;
529 }
530
531 return action;
532
533drop:
534 spin_lock(&p->tcf_lock);
535 p->tcf_qstats.drops++;
536 spin_unlock(&p->tcf_lock);
537 return TC_ACT_SHOT;
538}
539
540static int tcf_csum_dump(struct sk_buff *skb,
541 struct tc_action *a, int bind, int ref)
542{
543 unsigned char *b = skb_tail_pointer(skb);
544 struct tcf_csum *p = a->priv;
545 struct tc_csum opt = {
546 .update_flags = p->update_flags,
547 .index = p->tcf_index,
548 .action = p->tcf_action,
549 .refcnt = p->tcf_refcnt - ref,
550 .bindcnt = p->tcf_bindcnt - bind,
551 };
552 struct tcf_t t;
553
554 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
555 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
556 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
557 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
558 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
559
560 return skb->len;
561
562nla_put_failure:
563 nlmsg_trim(skb, b);
564 return -1;
565}
566
567static struct tc_action_ops act_csum_ops = {
568 .kind = "csum",
569 .hinfo = &csum_hash_info,
570 .type = TCA_ACT_CSUM,
571 .capab = TCA_CAP_NONE,
572 .owner = THIS_MODULE,
573 .act = tcf_csum,
574 .dump = tcf_csum_dump,
575 .cleanup = tcf_csum_cleanup,
576 .lookup = tcf_hash_search,
577 .init = tcf_csum_init,
578 .walk = tcf_generic_walker
579};
580
581MODULE_DESCRIPTION("Checksum updating actions");
582MODULE_LICENSE("GPL");
583
584static int __init csum_init_module(void)
585{
586 return tcf_register_action(&act_csum_ops);
587}
588
589static void __exit csum_cleanup_module(void)
590{
591 tcf_unregister_action(&act_csum_ops);
592}
593
594module_init(csum_init_module);
595module_exit(csum_cleanup_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..8daef9632255 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
39 .lock = &ipt_lock, 39 .lock = &ipt_lock,
40}; 40};
41 41
42static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) 42static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
43{ 43{
44 struct xt_tgchk_param par; 44 struct xt_tgchk_param par;
45 struct xt_target *target; 45 struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
66 return 0; 66 return 0;
67} 67}
68 68
69static void ipt_destroy_target(struct ipt_entry_target *t) 69static void ipt_destroy_target(struct xt_entry_target *t)
70{ 70{
71 struct xt_tgdtor_param par = { 71 struct xt_tgdtor_param par = {
72 .target = t->u.kernel.target, 72 .target = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, 99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
100 [TCA_IPT_HOOK] = { .type = NLA_U32 }, 100 [TCA_IPT_HOOK] = { .type = NLA_U32 },
101 [TCA_IPT_INDEX] = { .type = NLA_U32 }, 101 [TCA_IPT_INDEX] = { .type = NLA_U32 },
102 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) }, 102 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
103}; 103};
104 104
105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, 105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
108 struct nlattr *tb[TCA_IPT_MAX + 1]; 108 struct nlattr *tb[TCA_IPT_MAX + 1];
109 struct tcf_ipt *ipt; 109 struct tcf_ipt *ipt;
110 struct tcf_common *pc; 110 struct tcf_common *pc;
111 struct ipt_entry_target *td, *t; 111 struct xt_entry_target *td, *t;
112 char *tname; 112 char *tname;
113 int ret = 0, err; 113 int ret = 0, err;
114 u32 hook = 0; 114 u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
126 if (tb[TCA_IPT_TARG] == NULL) 126 if (tb[TCA_IPT_TARG] == NULL)
127 return -EINVAL; 127 return -EINVAL;
128 128
129 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]); 129 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) 130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
131 return -EINVAL; 131 return -EINVAL;
132 132
@@ -230,7 +230,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
230 result = TC_ACT_SHOT; 230 result = TC_ACT_SHOT;
231 ipt->tcf_qstats.drops++; 231 ipt->tcf_qstats.drops++;
232 break; 232 break;
233 case IPT_CONTINUE: 233 case XT_CONTINUE:
234 result = TC_ACT_PIPE; 234 result = TC_ACT_PIPE;
235 break; 235 break;
236 default: 236 default:
@@ -249,7 +249,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
249{ 249{
250 unsigned char *b = skb_tail_pointer(skb); 250 unsigned char *b = skb_tail_pointer(skb);
251 struct tcf_ipt *ipt = a->priv; 251 struct tcf_ipt *ipt = a->priv;
252 struct ipt_entry_target *t; 252 struct xt_entry_target *t;
253 struct tcf_t tm; 253 struct tcf_t tm;
254 struct tc_cnt c; 254 struct tc_cnt c;
255 255
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..f23d9155b1ef 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -268,6 +268,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
268 goto nla_put_failure; 268 goto nla_put_failure;
269 269
270 nla_nest_end(skb, nest); 270 nla_nest_end(skb, nest);
271
272 if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
273 goto nla_put_failure;
274
271 return skb->len; 275 return skb->len;
272 276
273nla_put_failure: 277nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 37dff78e9cb1..d49c40fb7e09 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
34 .populate = cgrp_populate, 34 .populate = cgrp_populate,
35#ifdef CONFIG_NET_CLS_CGROUP 35#ifdef CONFIG_NET_CLS_CGROUP
36 .subsys_id = net_cls_subsys_id, 36 .subsys_id = net_cls_subsys_id,
37#else
38#define net_cls_subsys_id net_cls_subsys.subsys_id
39#endif 37#endif
40 .module = THIS_MODULE, 38 .module = THIS_MODULE,
41}; 39};
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..5b271a18bc3a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..34da5e29ea1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 223 dst->value = skb->mac_len;
224} 224}
225 225
226META_COLLECTOR(int_rxhash)
227{
228 dst->value = skb_get_rxhash(skb);
229}
230
226/************************************************************************** 231/**************************************************************************
227 * Netfilter 232 * Netfilter
228 **************************************************************************/ 233 **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 546 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 547 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 548 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
549 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 550 }
545}; 551};
546 552
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..ea8f566e720c 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -103,7 +103,8 @@ retry:
103 103
104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) 104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
105{ 105{
106 textsearch_destroy(EM_TEXT_PRIV(m)->config); 106 if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
107 textsearch_destroy(EM_TEXT_PRIV(m)->config);
107} 108}
108 109
109static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) 110static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..b22ca2d1cebc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
240 if (q) 240 if (q)
241 goto out; 241 goto out;
242 242
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); 243 if (dev_ingress_queue(dev))
244 q = qdisc_match_from_root(
245 dev_ingress_queue(dev)->qdisc_sleeping,
246 handle);
244out: 247out:
245 return q; 248 return q;
246} 249}
@@ -360,7 +363,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 363 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 364 }
362 365
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 366 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 367 return ERR_PTR(-EINVAL);
365 368
366 spin_lock(&qdisc_stab_lock); 369 spin_lock(&qdisc_stab_lock);
@@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
690 (new && new->flags & TCQ_F_INGRESS)) { 693 (new && new->flags & TCQ_F_INGRESS)) {
691 num_q = 1; 694 num_q = 1;
692 ingress = 1; 695 ingress = 1;
696 if (!dev_ingress_queue(dev))
697 return -ENOENT;
693 } 698 }
694 699
695 if (dev->flags & IFF_UP) 700 if (dev->flags & IFF_UP)
@@ -701,7 +706,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
701 } 706 }
702 707
703 for (i = 0; i < num_q; i++) { 708 for (i = 0; i < num_q; i++) {
704 struct netdev_queue *dev_queue = &dev->rx_queue; 709 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
705 710
706 if (!ingress) 711 if (!ingress)
707 dev_queue = netdev_get_tx_queue(dev, i); 712 dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +984,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
979 return -ENOENT; 984 return -ENOENT;
980 q = qdisc_leaf(p, clid); 985 q = qdisc_leaf(p, clid);
981 } else { /* ingress */ 986 } else { /* ingress */
982 q = dev->rx_queue.qdisc_sleeping; 987 if (dev_ingress_queue(dev))
988 q = dev_ingress_queue(dev)->qdisc_sleeping;
983 } 989 }
984 } else { 990 } else {
985 q = dev->qdisc; 991 q = dev->qdisc;
@@ -1043,8 +1049,9 @@ replay:
1043 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 1049 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1044 return -ENOENT; 1050 return -ENOENT;
1045 q = qdisc_leaf(p, clid); 1051 q = qdisc_leaf(p, clid);
1046 } else { /*ingress */ 1052 } else { /* ingress */
1047 q = dev->rx_queue.qdisc_sleeping; 1053 if (dev_ingress_queue_create(dev))
1054 q = dev_ingress_queue(dev)->qdisc_sleeping;
1048 } 1055 }
1049 } else { 1056 } else {
1050 q = dev->qdisc; 1057 q = dev->qdisc;
@@ -1123,11 +1130,14 @@ replay:
1123create_n_graft: 1130create_n_graft:
1124 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1131 if (!(n->nlmsg_flags&NLM_F_CREATE))
1125 return -ENOENT; 1132 return -ENOENT;
1126 if (clid == TC_H_INGRESS) 1133 if (clid == TC_H_INGRESS) {
1127 q = qdisc_create(dev, &dev->rx_queue, p, 1134 if (dev_ingress_queue(dev))
1128 tcm->tcm_parent, tcm->tcm_parent, 1135 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1129 tca, &err); 1136 tcm->tcm_parent, tcm->tcm_parent,
1130 else { 1137 tca, &err);
1138 else
1139 err = -ENOENT;
1140 } else {
1131 struct netdev_queue *dev_queue; 1141 struct netdev_queue *dev_queue;
1132 1142
1133 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1143 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1304,8 +1314,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1304 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) 1314 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1305 goto done; 1315 goto done;
1306 1316
1307 dev_queue = &dev->rx_queue; 1317 dev_queue = dev_ingress_queue(dev);
1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) 1318 if (dev_queue &&
1319 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1320 &q_idx, s_q_idx) < 0)
1309 goto done; 1321 goto done;
1310 1322
1311cont: 1323cont:
@@ -1595,8 +1607,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1595 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 1607 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1596 goto done; 1608 goto done;
1597 1609
1598 dev_queue = &dev->rx_queue; 1610 dev_queue = dev_ingress_queue(dev);
1599 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) 1611 if (dev_queue &&
1612 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1613 &t, s_t) < 0)
1600 goto done; 1614 goto done;
1601 1615
1602done: 1616done:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6318e1136b83..282540778aa8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
275 goto err_out; 275 goto err_out;
276 } 276 }
277 flow->filter_list = NULL; 277 flow->filter_list = NULL;
278 flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 278 flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
279 &pfifo_qdisc_ops, classid);
280 if (!flow->q) 279 if (!flow->q)
281 flow->q = &noop_qdisc; 280 flow->q = &noop_qdisc;
282 pr_debug("atm_tc_change: qdisc %p\n", flow->q); 281 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -543,7 +542,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
543 INIT_LIST_HEAD(&p->flows); 542 INIT_LIST_HEAD(&p->flows);
544 INIT_LIST_HEAD(&p->link.list); 543 INIT_LIST_HEAD(&p->link.list);
545 list_add(&p->link.list, &p->flows); 544 list_add(&p->link.list, &p->flows);
546 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 545 p->link.q = qdisc_create_dflt(sch->dev_queue,
547 &pfifo_qdisc_ops, sch->handle); 546 &pfifo_qdisc_ops, sch->handle);
548 if (!p->link.q) 547 if (!p->link.q)
549 p->link.q = &noop_qdisc; 548 p->link.q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 28c01ef5abc8..eb7631590865 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1379,9 +1379,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1379 q->link.sibling = &q->link; 1379 q->link.sibling = &q->link;
1380 q->link.common.classid = sch->handle; 1380 q->link.common.classid = sch->handle;
1381 q->link.qdisc = sch; 1381 q->link.qdisc = sch;
1382 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1382 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1383 &pfifo_qdisc_ops, 1383 sch->handle);
1384 sch->handle))) 1384 if (!q->link.q)
1385 q->link.q = &noop_qdisc; 1385 q->link.q = &noop_qdisc;
1386 1386
1387 q->link.priority = TC_CBQ_MAXPRIO-1; 1387 q->link.priority = TC_CBQ_MAXPRIO-1;
@@ -1623,7 +1623,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1623 struct cbq_class *cl = (struct cbq_class*)arg; 1623 struct cbq_class *cl = (struct cbq_class*)arg;
1624 1624
1625 if (new == NULL) { 1625 if (new == NULL) {
1626 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1626 new = qdisc_create_dflt(sch->dev_queue,
1627 &pfifo_qdisc_ops, cl->common.classid); 1627 &pfifo_qdisc_ops, cl->common.classid);
1628 if (new == NULL) 1628 if (new == NULL)
1629 return -ENOBUFS; 1629 return -ENOBUFS;
@@ -1874,8 +1874,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1874 cl->R_tab = rtab; 1874 cl->R_tab = rtab;
1875 rtab = NULL; 1875 rtab = NULL;
1876 cl->refcnt = 1; 1876 cl->refcnt = 1;
1877 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1877 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1878 &pfifo_qdisc_ops, classid))) 1878 if (!cl->q)
1879 cl->q = &noop_qdisc; 1879 cl->q = &noop_qdisc;
1880 cl->common.classid = classid; 1880 cl->common.classid = classid;
1881 cl->tparent = parent; 1881 cl->tparent = parent;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index b74046a95397..aa8b5313f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
110 cl->refcnt = 1; 110 cl->refcnt = 1;
111 cl->common.classid = classid; 111 cl->common.classid = classid;
112 cl->quantum = quantum; 112 cl->quantum = quantum;
113 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 113 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
114 &pfifo_qdisc_ops, classid); 114 &pfifo_qdisc_ops, classid);
115 if (cl->qdisc == NULL) 115 if (cl->qdisc == NULL)
116 cl->qdisc = &noop_qdisc; 116 cl->qdisc = &noop_qdisc;
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
218 struct drr_class *cl = (struct drr_class *)arg; 218 struct drr_class *cl = (struct drr_class *)arg;
219 219
220 if (new == NULL) { 220 if (new == NULL) {
221 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 221 new = qdisc_create_dflt(sch->dev_queue,
222 &pfifo_qdisc_ops, cl->common.classid); 222 &pfifo_qdisc_ops, cl->common.classid);
223 if (new == NULL) 223 if (new == NULL)
224 new = &noop_qdisc; 224 new = &noop_qdisc;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 63d41f86679c..1d295d62bb5c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
61 sch, p, new, old); 61 sch, p, new, old);
62 62
63 if (new == NULL) { 63 if (new == NULL) {
64 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 64 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
65 &pfifo_qdisc_ops,
66 sch->handle); 65 sch->handle);
67 if (new == NULL) 66 if (new == NULL)
68 new = &noop_qdisc; 67 new = &noop_qdisc;
@@ -384,8 +383,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
384 p->default_index = default_index; 383 p->default_index = default_index;
385 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); 384 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
386 385
387 p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 386 p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
388 &pfifo_qdisc_ops, sch->handle);
389 if (p->q == NULL) 387 if (p->q == NULL)
390 p->q = &noop_qdisc; 388 p->q = &noop_qdisc;
391 389
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 5948bafa8ce2..4dfecb0cba37 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -172,8 +172,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
172 struct Qdisc *q; 172 struct Qdisc *q;
173 int err = -ENOMEM; 173 int err = -ENOMEM;
174 174
175 q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 175 q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
176 ops, TC_H_MAKE(sch->handle, 1));
177 if (q) { 176 if (q) {
178 err = fifo_set_limit(q, limit); 177 err = fifo_set_limit(q, limit);
179 if (err < 0) { 178 if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..5dbb3cd96e59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -383,6 +383,7 @@ struct Qdisc noop_qdisc = {
383 .list = LIST_HEAD_INIT(noop_qdisc.list), 383 .list = LIST_HEAD_INIT(noop_qdisc.list),
384 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 384 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
385 .dev_queue = &noop_netdev_queue, 385 .dev_queue = &noop_netdev_queue,
386 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
386}; 387};
387EXPORT_SYMBOL(noop_qdisc); 388EXPORT_SYMBOL(noop_qdisc);
388 389
@@ -409,6 +410,7 @@ static struct Qdisc noqueue_qdisc = {
409 .list = LIST_HEAD_INIT(noqueue_qdisc.list), 410 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
410 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), 411 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
411 .dev_queue = &noqueue_netdev_queue, 412 .dev_queue = &noqueue_netdev_queue,
413 .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
412}; 414};
413 415
414 416
@@ -574,10 +576,8 @@ errout:
574 return ERR_PTR(err); 576 return ERR_PTR(err);
575} 577}
576 578
577struct Qdisc * qdisc_create_dflt(struct net_device *dev, 579struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
578 struct netdev_queue *dev_queue, 580 struct Qdisc_ops *ops, unsigned int parentid)
579 struct Qdisc_ops *ops,
580 unsigned int parentid)
581{ 581{
582 struct Qdisc *sch; 582 struct Qdisc *sch;
583 583
@@ -682,7 +682,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
682 struct Qdisc *qdisc; 682 struct Qdisc *qdisc;
683 683
684 if (dev->tx_queue_len) { 684 if (dev->tx_queue_len) {
685 qdisc = qdisc_create_dflt(dev, dev_queue, 685 qdisc = qdisc_create_dflt(dev_queue,
686 &pfifo_fast_ops, TC_H_ROOT); 686 &pfifo_fast_ops, TC_H_ROOT);
687 if (!qdisc) { 687 if (!qdisc) {
688 printk(KERN_INFO "%s: activation failed\n", dev->name); 688 printk(KERN_INFO "%s: activation failed\n", dev->name);
@@ -709,7 +709,7 @@ static void attach_default_qdiscs(struct net_device *dev)
709 dev->qdisc = txq->qdisc_sleeping; 709 dev->qdisc = txq->qdisc_sleeping;
710 atomic_inc(&dev->qdisc->refcnt); 710 atomic_inc(&dev->qdisc->refcnt);
711 } else { 711 } else {
712 qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); 712 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
713 if (qdisc) { 713 if (qdisc) {
714 qdisc->ops->attach(qdisc); 714 qdisc->ops->attach(qdisc);
715 dev->qdisc = qdisc; 715 dev->qdisc = qdisc;
@@ -753,7 +753,8 @@ void dev_activate(struct net_device *dev)
753 753
754 need_watchdog = 0; 754 need_watchdog = 0;
755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
756 transition_one_qdisc(dev, &dev->rx_queue, NULL); 756 if (dev_ingress_queue(dev))
757 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
757 758
758 if (need_watchdog) { 759 if (need_watchdog) {
759 dev->trans_start = jiffies; 760 dev->trans_start = jiffies;
@@ -812,7 +813,8 @@ static bool some_qdisc_is_busy(struct net_device *dev)
812void dev_deactivate(struct net_device *dev) 813void dev_deactivate(struct net_device *dev)
813{ 814{
814 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); 815 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
815 dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); 816 if (dev_ingress_queue(dev))
817 dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
816 818
817 dev_watchdog_down(dev); 819 dev_watchdog_down(dev);
818 820
@@ -838,7 +840,8 @@ void dev_init_scheduler(struct net_device *dev)
838{ 840{
839 dev->qdisc = &noop_qdisc; 841 dev->qdisc = &noop_qdisc;
840 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 842 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
841 dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 843 if (dev_ingress_queue(dev))
844 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
842 845
843 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 846 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
844} 847}
@@ -861,7 +864,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
861void dev_shutdown(struct net_device *dev) 864void dev_shutdown(struct net_device *dev)
862{ 865{
863 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 866 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
864 shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 867 if (dev_ingress_queue(dev))
868 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
865 qdisc_destroy(dev->qdisc); 869 qdisc_destroy(dev->qdisc);
866 dev->qdisc = &noop_qdisc; 870 dev->qdisc = &noop_qdisc;
867 871
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 47496098d35c..069c62b7bb36 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1088,7 +1088,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1088 cl->refcnt = 1; 1088 cl->refcnt = 1;
1089 cl->sched = q; 1089 cl->sched = q;
1090 cl->cl_parent = parent; 1090 cl->cl_parent = parent;
1091 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1091 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
1092 &pfifo_qdisc_ops, classid); 1092 &pfifo_qdisc_ops, classid);
1093 if (cl->qdisc == NULL) 1093 if (cl->qdisc == NULL)
1094 cl->qdisc = &noop_qdisc; 1094 cl->qdisc = &noop_qdisc;
@@ -1209,8 +1209,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1209 if (cl->level > 0) 1209 if (cl->level > 0)
1210 return -EINVAL; 1210 return -EINVAL;
1211 if (new == NULL) { 1211 if (new == NULL) {
1212 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1212 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1213 &pfifo_qdisc_ops,
1214 cl->cl_common.classid); 1213 cl->cl_common.classid);
1215 if (new == NULL) 1214 if (new == NULL)
1216 new = &noop_qdisc; 1215 new = &noop_qdisc;
@@ -1452,8 +1451,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1452 q->root.cl_common.classid = sch->handle; 1451 q->root.cl_common.classid = sch->handle;
1453 q->root.refcnt = 1; 1452 q->root.refcnt = 1;
1454 q->root.sched = q; 1453 q->root.sched = q;
1455 q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1454 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1456 &pfifo_qdisc_ops,
1457 sch->handle); 1455 sch->handle);
1458 if (q->root.qdisc == NULL) 1456 if (q->root.qdisc == NULL)
1459 q->root.qdisc = &noop_qdisc; 1457 q->root.qdisc = &noop_qdisc;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4be8d04b262d..01b519d6c52d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1121,8 +1121,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1121 if (cl->level) 1121 if (cl->level)
1122 return -EINVAL; 1122 return -EINVAL;
1123 if (new == NULL && 1123 if (new == NULL &&
1124 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1124 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1125 &pfifo_qdisc_ops,
1126 cl->common.classid)) == NULL) 1125 cl->common.classid)) == NULL)
1127 return -ENOBUFS; 1126 return -ENOBUFS;
1128 1127
@@ -1247,8 +1246,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1247 return -EBUSY; 1246 return -EBUSY;
1248 1247
1249 if (!cl->level && htb_parent_last_child(cl)) { 1248 if (!cl->level && htb_parent_last_child(cl)) {
1250 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1249 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1251 &pfifo_qdisc_ops,
1252 cl->parent->common.classid); 1250 cl->parent->common.classid);
1253 last_child = 1; 1251 last_child = 1;
1254 } 1252 }
@@ -1302,14 +1300,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1300 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct nlattr *opt = tca[TCA_OPTIONS]; 1301 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1302 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1305 struct nlattr *tb[TCA_HTB_RTAB + 1]; 1303 struct nlattr *tb[__TCA_HTB_MAX];
1306 struct tc_htb_opt *hopt; 1304 struct tc_htb_opt *hopt;
1307 1305
1308 /* extract all subattrs from opt attr */ 1306 /* extract all subattrs from opt attr */
1309 if (!opt) 1307 if (!opt)
1310 goto failure; 1308 goto failure;
1311 1309
1312 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy); 1310 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1313 if (err < 0) 1311 if (err < 0)
1314 goto failure; 1312 goto failure;
1315 1313
@@ -1377,7 +1375,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1377 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1375 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1378 so that can't be used inside of sch_tree_lock 1376 so that can't be used inside of sch_tree_lock
1379 -- thanks to Karlis Peisenieks */ 1377 -- thanks to Karlis Peisenieks */
1380 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1378 new_q = qdisc_create_dflt(sch->dev_queue,
1381 &pfifo_qdisc_ops, classid); 1379 &pfifo_qdisc_ops, classid);
1382 sch_tree_lock(sch); 1380 sch_tree_lock(sch);
1383 if (parent && !parent->level) { 1381 if (parent && !parent->level) {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index fe91e50f9d98..ecc302f4d2a1 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -56,7 +56,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
56 56
57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
58 dev_queue = netdev_get_tx_queue(dev, ntx); 58 dev_queue = netdev_get_tx_queue(dev, ntx);
59 qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, 59 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
60 TC_H_MAKE(TC_H_MAJ(sch->handle), 60 TC_H_MAKE(TC_H_MAJ(sch->handle),
61 TC_H_MIN(ntx + 1))); 61 TC_H_MIN(ntx + 1)));
62 if (qdisc == NULL) 62 if (qdisc == NULL)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 6ae251279fc2..32690deab5d0 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -227,8 +227,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
227 for (i = 0; i < q->bands; i++) { 227 for (i = 0; i < q->bands; i++) {
228 if (q->queues[i] == &noop_qdisc) { 228 if (q->queues[i] == &noop_qdisc) {
229 struct Qdisc *child, *old; 229 struct Qdisc *child, *old;
230 child = qdisc_create_dflt(qdisc_dev(sch), 230 child = qdisc_create_dflt(sch->dev_queue,
231 sch->dev_queue,
232 &pfifo_qdisc_ops, 231 &pfifo_qdisc_ops,
233 TC_H_MAKE(sch->handle, 232 TC_H_MAKE(sch->handle,
234 i + 1)); 233 i + 1));
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4714ff162bbd..e5593c083a78 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -538,8 +538,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
538 538
539 qdisc_watchdog_init(&q->watchdog, sch); 539 qdisc_watchdog_init(&q->watchdog, sch);
540 540
541 q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 541 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
542 &tfifo_qdisc_ops,
543 TC_H_MAKE(sch->handle, 1)); 542 TC_H_MAKE(sch->handle, 1));
544 if (!q->qdisc) { 543 if (!q->qdisc) {
545 pr_debug("netem: qdisc create failed\n"); 544 pr_debug("netem: qdisc create failed\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0748fb1e3a49..b1c95bce33ce 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -200,7 +200,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
200 for (i=0; i<q->bands; i++) { 200 for (i=0; i<q->bands; i++) {
201 if (q->queues[i] == &noop_qdisc) { 201 if (q->queues[i] == &noop_qdisc) {
202 struct Qdisc *child, *old; 202 struct Qdisc *child, *old;
203 child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 203 child = qdisc_create_dflt(sch->dev_queue,
204 &pfifo_qdisc_ops, 204 &pfifo_qdisc_ops,
205 TC_H_MAKE(sch->handle, i + 1)); 205 TC_H_MAKE(sch->handle, i + 1));
206 if (child) { 206 if (child) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..3cf478d012dd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph; 125 const struct iphdr *iph;
126 int poff;
126 127
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 128 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 129 goto err;
129 iph = ip_hdr(skb); 130 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 131 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 132 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 133 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 134 break;
134 iph->protocol == IPPROTO_UDP || 135 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 136 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 137 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 138 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 139 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 140 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 141 break;
142 } 142 }
143 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
144 { 144 {
145 struct ipv6hdr *iph; 145 struct ipv6hdr *iph;
146 int poff;
146 147
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 148 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 149 goto err;
149 iph = ipv6_hdr(skb); 150 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 151 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 152 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 153 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 154 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 155 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 156 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 157 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 158 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 159 break;
161 } 160 }
162 default: 161 default:
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..401af9596709 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
241 } 241 }
242 if (neigh_event_send(n, skb_res) == 0) { 242 if (neigh_event_send(n, skb_res) == 0) {
243 int err; 243 int err;
244 char haddr[MAX_ADDR_LEN];
244 245
245 read_lock(&n->lock); 246 neigh_ha_snapshot(haddr, n, dev);
246 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 247 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
247 n->ha, NULL, skb->len); 248 NULL, skb->len);
248 read_unlock(&n->lock);
249 249
250 if (err < 0) { 250 if (err < 0) {
251 neigh_release(n); 251 neigh_release(n);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e5256434..5f1fb8bd862d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/fcntl.h> 54#include <linux/fcntl.h>
53#include <linux/poll.h> 55#include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100ed..6c8556459a75 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
37 * be incorporated into the next SCTP release. 37 * be incorporated into the next SCTP release.
38 */ 38 */
39 39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
40#include <linux/types.h> 42#include <linux/types.h>
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <linux/net.h> 44#include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15b..397296fb156f 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
43 * be incorporated into the next SCTP release. 43 * be incorporated into the next SCTP release.
44 */ 44 */
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47
46#include <net/sctp/sctp.h> 48#include <net/sctp/sctp.h>
47#include <net/sctp/sm.h> 49#include <net/sctp/sm.h>
48#include <linux/interrupt.h> 50#include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb8..95e0c8eda1a0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/module.h> 52#include <linux/module.h>
51#include <linux/errno.h> 53#include <linux/errno.h>
52#include <linux/types.h> 54#include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
336 memcpy(saddr, baddr, sizeof(union sctp_addr)); 338 memcpy(saddr, baddr, sizeof(union sctp_addr));
337 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
338 } else { 340 } else {
339 printk(KERN_ERR "%s: asoc:%p Could not find a valid source " 341 pr_err("%s: asoc:%p Could not find a valid source "
340 "address for the dest:%pI6\n", 342 "address for the dest:%pI6\n",
341 __func__, asoc, &daddr->v6.sin6_addr); 343 __func__, asoc, &daddr->v6.sin6_addr);
342 } 344 }
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93ba..8ef8e7d9eb61 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
38 * be incorporated into the next SCTP release. 38 * be incorporated into the next SCTP release.
39 */ 39 */
40 40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <net/sctp/sctp.h> 44#include <net/sctp/sctp.h>
43 45
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
134 ent = proc_create("sctp_dbg_objcnt", 0, 136 ent = proc_create("sctp_dbg_objcnt", 0,
135 proc_net_sctp, &sctp_objcnt_ops); 137 proc_net_sctp, &sctp_objcnt_ops);
136 if (!ent) 138 if (!ent)
137 printk(KERN_WARNING 139 pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
138 "sctp_dbg_objcnt: Unable to create /proc entry.\n");
139} 140}
140 141
141/* Cleanup the objcount entry in the proc filesystem. */ 142/* Cleanup the objcount entry in the proc filesystem. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bcc4590ccaf2..60600d337a3a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
41 * be incorporated into the next SCTP release. 41 * be incorporated into the next SCTP release.
42 */ 42 */
43 43
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
45
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/kernel.h> 47#include <linux/kernel.h>
46#include <linux/wait.h> 48#include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb59186..8c6d379b4bb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/types.h> 51#include <linux/types.h>
50#include <linux/list.h> /* For struct list_head */ 52#include <linux/list.h> /* For struct list_head */
51#include <linux/socket.h> 53#include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1463 /* Display the end of the 1465 /* Display the end of the
1464 * current range. 1466 * current range.
1465 */ 1467 */
1466 SCTP_DEBUG_PRINTK("-%08x", 1468 SCTP_DEBUG_PRINTK_CONT("-%08x",
1467 dbg_last_ack_tsn); 1469 dbg_last_ack_tsn);
1468 } 1470 }
1469 1471
1470 /* Start a new range. */ 1472 /* Start a new range. */
1471 SCTP_DEBUG_PRINTK(",%08x", tsn); 1473 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1472 dbg_ack_tsn = tsn; 1474 dbg_ack_tsn = tsn;
1473 break; 1475 break;
1474 1476
1475 case 1: /* The last TSN was NOT ACKed. */ 1477 case 1: /* The last TSN was NOT ACKed. */
1476 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1478 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1477 /* Display the end of current range. */ 1479 /* Display the end of current range. */
1478 SCTP_DEBUG_PRINTK("-%08x", 1480 SCTP_DEBUG_PRINTK_CONT("-%08x",
1479 dbg_last_kept_tsn); 1481 dbg_last_kept_tsn);
1480 } 1482 }
1481 1483
1482 SCTP_DEBUG_PRINTK("\n"); 1484 SCTP_DEBUG_PRINTK_CONT("\n");
1483 1485
1484 /* FALL THROUGH... */ 1486 /* FALL THROUGH... */
1485 default: 1487 default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1526 break; 1528 break;
1527 1529
1528 if (dbg_last_kept_tsn != dbg_kept_tsn) 1530 if (dbg_last_kept_tsn != dbg_kept_tsn)
1529 SCTP_DEBUG_PRINTK("-%08x", 1531 SCTP_DEBUG_PRINTK_CONT("-%08x",
1530 dbg_last_kept_tsn); 1532 dbg_last_kept_tsn);
1531 1533
1532 SCTP_DEBUG_PRINTK(",%08x", tsn); 1534 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1533 dbg_kept_tsn = tsn; 1535 dbg_kept_tsn = tsn;
1534 break; 1536 break;
1535 1537
1536 case 0: 1538 case 0:
1537 if (dbg_last_ack_tsn != dbg_ack_tsn) 1539 if (dbg_last_ack_tsn != dbg_ack_tsn)
1538 SCTP_DEBUG_PRINTK("-%08x", 1540 SCTP_DEBUG_PRINTK_CONT("-%08x",
1539 dbg_last_ack_tsn); 1541 dbg_last_ack_tsn);
1540 SCTP_DEBUG_PRINTK("\n"); 1542 SCTP_DEBUG_PRINTK_CONT("\n");
1541 1543
1542 /* FALL THROUGH... */ 1544 /* FALL THROUGH... */
1543 default: 1545 default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1556 switch (dbg_prt_state) { 1558 switch (dbg_prt_state) {
1557 case 0: 1559 case 0:
1558 if (dbg_last_ack_tsn != dbg_ack_tsn) { 1560 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1559 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); 1561 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1560 } else { 1562 } else {
1561 SCTP_DEBUG_PRINTK("\n"); 1563 SCTP_DEBUG_PRINTK_CONT("\n");
1562 } 1564 }
1563 break; 1565 break;
1564 1566
1565 case 1: 1567 case 1:
1566 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1568 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1567 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); 1569 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1568 } else { 1570 } else {
1569 SCTP_DEBUG_PRINTK("\n"); 1571 SCTP_DEBUG_PRINTK_CONT("\n");
1570 } 1572 }
1571 } 1573 }
1572#endif /* SCTP_DEBUG */ 1574#endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index db3a42b8b349..bc6cd75cc1dc 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/kprobes.h> 28#include <linux/kprobes.h>
27#include <linux/socket.h> 29#include <linux/socket.h>
@@ -117,6 +119,7 @@ static const struct file_operations sctpprobe_fops = {
117 .owner = THIS_MODULE, 119 .owner = THIS_MODULE,
118 .open = sctpprobe_open, 120 .open = sctpprobe_open,
119 .read = sctpprobe_read, 121 .read = sctpprobe_read,
122 .llseek = noop_llseek,
120}; 123};
121 124
122sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, 125sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
@@ -192,7 +195,7 @@ static __init int sctpprobe_init(void)
192 if (ret) 195 if (ret)
193 goto remove_proc; 196 goto remove_proc;
194 197
195 pr_info("SCTP probe registered (port=%d)\n", port); 198 pr_info("probe registered (port=%d)\n", port);
196 199
197 return 0; 200 return 0;
198 201
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc0..e58f9476f29c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/module.h> 51#include <linux/module.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/netdevice.h> 53#include <linux/netdevice.h>
@@ -90,7 +92,7 @@ static struct sctp_af *sctp_af_v6_specific;
90struct kmem_cache *sctp_chunk_cachep __read_mostly; 92struct kmem_cache *sctp_chunk_cachep __read_mostly;
91struct kmem_cache *sctp_bucket_cachep __read_mostly; 93struct kmem_cache *sctp_bucket_cachep __read_mostly;
92 94
93int sysctl_sctp_mem[3]; 95long sysctl_sctp_mem[3];
94int sysctl_sctp_rmem[3]; 96int sysctl_sctp_rmem[3];
95int sysctl_sctp_wmem[3]; 97int sysctl_sctp_wmem[3];
96 98
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
707 &init_net); 709 &init_net);
708 710
709 if (err < 0) { 711 if (err < 0) {
710 printk(KERN_ERR 712 pr_err("Failed to create the SCTP control socket\n");
711 "SCTP: Failed to create the SCTP control socket.\n");
712 return err; 713 return err;
713 } 714 }
714 return 0; 715 return 0;
@@ -798,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
798static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) 799static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
799{ 800{
800 /* PF_INET only supports AF_INET addresses. */ 801 /* PF_INET only supports AF_INET addresses. */
801 return (AF_INET == family); 802 return AF_INET == family;
802} 803}
803 804
804/* Address matching with wildcards allowed. */ 805/* Address matching with wildcards allowed. */
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
1206 __get_free_pages(GFP_ATOMIC, order); 1207 __get_free_pages(GFP_ATOMIC, order);
1207 } while (!sctp_assoc_hashtable && --order > 0); 1208 } while (!sctp_assoc_hashtable && --order > 0);
1208 if (!sctp_assoc_hashtable) { 1209 if (!sctp_assoc_hashtable) {
1209 printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); 1210 pr_err("Failed association hash alloc\n");
1210 status = -ENOMEM; 1211 status = -ENOMEM;
1211 goto err_ahash_alloc; 1212 goto err_ahash_alloc;
1212 } 1213 }
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
1220 sctp_ep_hashtable = (struct sctp_hashbucket *) 1221 sctp_ep_hashtable = (struct sctp_hashbucket *)
1221 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1222 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1222 if (!sctp_ep_hashtable) { 1223 if (!sctp_ep_hashtable) {
1223 printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); 1224 pr_err("Failed endpoint_hash alloc\n");
1224 status = -ENOMEM; 1225 status = -ENOMEM;
1225 goto err_ehash_alloc; 1226 goto err_ehash_alloc;
1226 } 1227 }
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
1239 __get_free_pages(GFP_ATOMIC, order); 1240 __get_free_pages(GFP_ATOMIC, order);
1240 } while (!sctp_port_hashtable && --order > 0); 1241 } while (!sctp_port_hashtable && --order > 0);
1241 if (!sctp_port_hashtable) { 1242 if (!sctp_port_hashtable) {
1242 printk(KERN_ERR "SCTP: Failed bind hash alloc."); 1243 pr_err("Failed bind hash alloc\n");
1243 status = -ENOMEM; 1244 status = -ENOMEM;
1244 goto err_bhash_alloc; 1245 goto err_bhash_alloc;
1245 } 1246 }
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
1248 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1249 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1249 } 1250 }
1250 1251
1251 printk(KERN_INFO "SCTP: Hash tables configured " 1252 pr_info("Hash tables configured (established %d bind %d)\n",
1252 "(established %d bind %d)\n",
1253 sctp_assoc_hashsize, sctp_port_hashsize); 1253 sctp_assoc_hashsize, sctp_port_hashsize);
1254 1254
1255 /* Disable ADDIP by default. */ 1255 /* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
1290 1290
1291 /* Initialize the control inode/socket for handling OOTB packets. */ 1291 /* Initialize the control inode/socket for handling OOTB packets. */
1292 if ((status = sctp_ctl_sock_init())) { 1292 if ((status = sctp_ctl_sock_init())) {
1293 printk (KERN_ERR 1293 pr_err("Failed to initialize the SCTP control sock\n");
1294 "SCTP: Failed to initialize the SCTP control sock.\n");
1295 goto err_ctl_sock_init; 1294 goto err_ctl_sock_init;
1296 } 1295 }
1297 1296
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f92924658..2cc46f0962ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5e..b21b218d564f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/skbuff.h> 52#include <linux/skbuff.h>
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/socket.h> 54#include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1146 1148
1147 case SCTP_DISPOSITION_VIOLATION: 1149 case SCTP_DISPOSITION_VIOLATION:
1148 if (net_ratelimit()) 1150 if (net_ratelimit())
1149 printk(KERN_ERR "sctp protocol violation state %d " 1151 pr_err("protocol violation state %d chunkid %d\n",
1150 "chunkid %d\n", state, subtype.chunk); 1152 state, subtype.chunk);
1151 break; 1153 break;
1152 1154
1153 case SCTP_DISPOSITION_NOT_IMPL: 1155 case SCTP_DISPOSITION_NOT_IMPL:
1154 printk(KERN_WARNING "sctp unimplemented feature in state %d, " 1156 pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
1155 "event_type %d, event_id %d\n", 1157 state, event_type, subtype.chunk);
1156 state, event_type, subtype.chunk);
1157 break; 1158 break;
1158 1159
1159 case SCTP_DISPOSITION_BUG: 1160 case SCTP_DISPOSITION_BUG:
1160 printk(KERN_ERR "sctp bug in state %d, " 1161 pr_err("bug in state %d, event_type %d, event_id %d\n",
1161 "event_type %d, event_id %d\n",
1162 state, event_type, subtype.chunk); 1162 state, event_type, subtype.chunk);
1163 BUG(); 1163 BUG();
1164 break; 1164 break;
1165 1165
1166 default: 1166 default:
1167 printk(KERN_ERR "sctp impossible disposition %d " 1167 pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
1168 "in state %d, event_type %d, event_id %d\n",
1169 status, state, event_type, subtype.chunk); 1168 status, state, event_type, subtype.chunk);
1170 BUG(); 1169 BUG();
1171 break; 1170 break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1679 sctp_cmd_send_asconf(asoc); 1678 sctp_cmd_send_asconf(asoc);
1680 break; 1679 break;
1681 default: 1680 default:
1682 printk(KERN_WARNING "Impossible command: %u, %p\n", 1681 pr_warn("Impossible command: %u, %p\n",
1683 cmd->verb, cmd->obj.ptr); 1682 cmd->verb, cmd->obj.ptr);
1684 break; 1683 break;
1685 } 1684 }
1686 1685
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d344dc481ccc..4b4eb7c96bbd 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1138 if (unlikely(!link)) { 1140 if (unlikely(!link)) {
1139 if (from_addr.sa.sa_family == AF_INET6) { 1141 if (from_addr.sa.sa_family == AF_INET6) {
1140 if (net_ratelimit()) 1142 if (net_ratelimit())
1141 printk(KERN_WARNING 1143 pr_warn("%s association %p could not find address %pI6\n",
1142 "%s association %p could not find address %pI6\n", 1144 __func__,
1143 __func__, 1145 asoc,
1144 asoc, 1146 &from_addr.v6.sin6_addr);
1145 &from_addr.v6.sin6_addr);
1146 } else { 1147 } else {
1147 if (net_ratelimit()) 1148 if (net_ratelimit())
1148 printk(KERN_WARNING 1149 pr_warn("%s association %p could not find address %pI4\n",
1149 "%s association %p could not find address %pI4\n", 1150 __func__,
1150 __func__, 1151 asoc,
1151 asoc, 1152 &from_addr.v4.sin_addr.s_addr);
1152 &from_addr.v4.sin_addr.s_addr);
1153 } 1153 }
1154 return SCTP_DISPOSITION_DISCARD; 1154 return SCTP_DISPOSITION_DISCARD;
1155 } 1155 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5d..546d4387fb3c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/skbuff.h> 51#include <linux/skbuff.h>
50#include <net/sctp/sctp.h> 52#include <net/sctp/sctp.h>
51#include <net/sctp/sm.h> 53#include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
66 .name = "sctp_sf_bug" 68 .name = "sctp_sf_bug"
67}; 69};
68 70
69#define DO_LOOKUP(_max, _type, _table) \ 71#define DO_LOOKUP(_max, _type, _table) \
70 if ((event_subtype._type > (_max))) { \ 72({ \
71 printk(KERN_WARNING \ 73 const sctp_sm_table_entry_t *rtn; \
72 "sctp table %p possible attack:" \ 74 \
73 " event %d exceeds max %d\n", \ 75 if ((event_subtype._type > (_max))) { \
74 _table, event_subtype._type, _max); \ 76 pr_warn("table %p possible attack: event %d exceeds max %d\n", \
75 return &bug; \ 77 _table, event_subtype._type, _max); \
76 } \ 78 rtn = &bug; \
77 return &_table[event_subtype._type][(int)state]; 79 } else \
80 rtn = &_table[event_subtype._type][(int)state]; \
81 \
82 rtn; \
83})
78 84
79const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, 85const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
80 sctp_state_t state, 86 sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
83 switch (event_type) { 89 switch (event_type) {
84 case SCTP_EVENT_T_CHUNK: 90 case SCTP_EVENT_T_CHUNK:
85 return sctp_chunk_event_lookup(event_subtype.chunk, state); 91 return sctp_chunk_event_lookup(event_subtype.chunk, state);
86 break;
87 case SCTP_EVENT_T_TIMEOUT: 92 case SCTP_EVENT_T_TIMEOUT:
88 DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, 93 return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
89 timeout_event_table); 94 timeout_event_table);
90 break;
91
92 case SCTP_EVENT_T_OTHER: 95 case SCTP_EVENT_T_OTHER:
93 DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); 96 return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
94 break; 97 other_event_table);
95
96 case SCTP_EVENT_T_PRIMITIVE: 98 case SCTP_EVENT_T_PRIMITIVE:
97 DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, 99 return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
98 primitive_event_table); 100 primitive_event_table);
99 break;
100
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fbb70770ad05..6bd554323a34 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
57 * be incorporated into the next SCTP release. 57 * be incorporated into the next SCTP release.
58 */ 58 */
59 59
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
60#include <linux/types.h> 62#include <linux/types.h>
61#include <linux/kernel.h> 63#include <linux/kernel.h>
62#include <linux/wait.h> 64#include <linux/wait.h>
@@ -109,12 +111,12 @@ static void sctp_sock_migrate(struct sock *, struct sock *,
109static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; 111static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
110 112
111extern struct kmem_cache *sctp_bucket_cachep; 113extern struct kmem_cache *sctp_bucket_cachep;
112extern int sysctl_sctp_mem[3]; 114extern long sysctl_sctp_mem[3];
113extern int sysctl_sctp_rmem[3]; 115extern int sysctl_sctp_rmem[3];
114extern int sysctl_sctp_wmem[3]; 116extern int sysctl_sctp_wmem[3];
115 117
116static int sctp_memory_pressure; 118static int sctp_memory_pressure;
117static atomic_t sctp_memory_allocated; 119static atomic_long_t sctp_memory_allocated;
118struct percpu_counter sctp_sockets_allocated; 120struct percpu_counter sctp_sockets_allocated;
119 121
120static void sctp_enter_memory_pressure(struct sock *sk) 122static void sctp_enter_memory_pressure(struct sock *sk)
@@ -2469,9 +2471,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
2469 if (params.sack_delay == 0 && params.sack_freq == 0) 2471 if (params.sack_delay == 0 && params.sack_freq == 0)
2470 return 0; 2472 return 0;
2471 } else if (optlen == sizeof(struct sctp_assoc_value)) { 2473 } else if (optlen == sizeof(struct sctp_assoc_value)) {
2472 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 2474 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
2473 "in delayed_ack socket option deprecated\n"); 2475 pr_warn("Use struct sctp_sack_info instead\n");
2474 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
2475 if (copy_from_user(&params, optval, optlen)) 2476 if (copy_from_user(&params, optval, optlen))
2476 return -EFAULT; 2477 return -EFAULT;
2477 2478
@@ -2879,10 +2880,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
2879 int val; 2880 int val;
2880 2881
2881 if (optlen == sizeof(int)) { 2882 if (optlen == sizeof(int)) {
2882 printk(KERN_WARNING 2883 pr_warn("Use of int in maxseg socket option deprecated\n");
2883 "SCTP: Use of int in maxseg socket option deprecated\n"); 2884 pr_warn("Use struct sctp_assoc_value instead\n");
2884 printk(KERN_WARNING
2885 "SCTP: Use struct sctp_assoc_value instead\n");
2886 if (copy_from_user(&val, optval, optlen)) 2885 if (copy_from_user(&val, optval, optlen))
2887 return -EFAULT; 2886 return -EFAULT;
2888 params.assoc_id = 0; 2887 params.assoc_id = 0;
@@ -3132,10 +3131,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3132 int assoc_id = 0; 3131 int assoc_id = 0;
3133 3132
3134 if (optlen == sizeof(int)) { 3133 if (optlen == sizeof(int)) {
3135 printk(KERN_WARNING 3134 pr_warn("Use of int in max_burst socket option deprecated\n");
3136 "SCTP: Use of int in max_burst socket option deprecated\n"); 3135 pr_warn("Use struct sctp_assoc_value instead\n");
3137 printk(KERN_WARNING
3138 "SCTP: Use struct sctp_assoc_value instead\n");
3139 if (copy_from_user(&val, optval, optlen)) 3136 if (copy_from_user(&val, optval, optlen))
3140 return -EFAULT; 3137 return -EFAULT;
3141 } else if (optlen == sizeof(struct sctp_assoc_value)) { 3138 } else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -3606,7 +3603,40 @@ out:
3606/* The SCTP ioctl handler. */ 3603/* The SCTP ioctl handler. */
3607SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3604SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3608{ 3605{
3609 return -ENOIOCTLCMD; 3606 int rc = -ENOTCONN;
3607
3608 sctp_lock_sock(sk);
3609
3610 /*
3611 * SEQPACKET-style sockets in LISTENING state are valid, for
3612 * SCTP, so only discard TCP-style sockets in LISTENING state.
3613 */
3614 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
3615 goto out;
3616
3617 switch (cmd) {
3618 case SIOCINQ: {
3619 struct sk_buff *skb;
3620 unsigned int amount = 0;
3621
3622 skb = skb_peek(&sk->sk_receive_queue);
3623 if (skb != NULL) {
3624 /*
3625 * We will only return the amount of this packet since
3626 * that is all that will be read.
3627 */
3628 amount = skb->len;
3629 }
3630 rc = put_user(amount, (int __user *)arg);
3631 break;
3632 }
3633 default:
3634 rc = -ENOIOCTLCMD;
3635 break;
3636 }
3637out:
3638 sctp_release_sock(sk);
3639 return rc;
3610} 3640}
3611 3641
3612/* This is the function which gets called during socket creation to 3642/* This is the function which gets called during socket creation to
@@ -3865,7 +3895,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
3865 } 3895 }
3866 3896
3867out: 3897out:
3868 return (retval); 3898 return retval;
3869} 3899}
3870 3900
3871 3901
@@ -3921,7 +3951,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
3921 } 3951 }
3922 3952
3923out: 3953out:
3924 return (retval); 3954 return retval;
3925} 3955}
3926 3956
3927/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) 3957/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
@@ -4292,9 +4322,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
4292 if (copy_from_user(&params, optval, len)) 4322 if (copy_from_user(&params, optval, len))
4293 return -EFAULT; 4323 return -EFAULT;
4294 } else if (len == sizeof(struct sctp_assoc_value)) { 4324 } else if (len == sizeof(struct sctp_assoc_value)) {
4295 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 4325 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
4296 "in delayed_ack socket option deprecated\n"); 4326 pr_warn("Use struct sctp_sack_info instead\n");
4297 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
4298 if (copy_from_user(&params, optval, len)) 4327 if (copy_from_user(&params, optval, len))
4299 return -EFAULT; 4328 return -EFAULT;
4300 } else 4329 } else
@@ -4940,10 +4969,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4940 struct sctp_association *asoc; 4969 struct sctp_association *asoc;
4941 4970
4942 if (len == sizeof(int)) { 4971 if (len == sizeof(int)) {
4943 printk(KERN_WARNING 4972 pr_warn("Use of int in maxseg socket option deprecated\n");
4944 "SCTP: Use of int in maxseg socket option deprecated\n"); 4973 pr_warn("Use struct sctp_assoc_value instead\n");
4945 printk(KERN_WARNING
4946 "SCTP: Use struct sctp_assoc_value instead\n");
4947 params.assoc_id = 0; 4974 params.assoc_id = 0;
4948 } else if (len >= sizeof(struct sctp_assoc_value)) { 4975 } else if (len >= sizeof(struct sctp_assoc_value)) {
4949 len = sizeof(struct sctp_assoc_value); 4976 len = sizeof(struct sctp_assoc_value);
@@ -5034,10 +5061,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5034 struct sctp_association *asoc; 5061 struct sctp_association *asoc;
5035 5062
5036 if (len == sizeof(int)) { 5063 if (len == sizeof(int)) {
5037 printk(KERN_WARNING 5064 pr_warn("Use of int in max_burst socket option deprecated\n");
5038 "SCTP: Use of int in max_burst socket option deprecated\n"); 5065 pr_warn("Use struct sctp_assoc_value instead\n");
5039 printk(KERN_WARNING
5040 "SCTP: Use struct sctp_assoc_value instead\n");
5041 params.assoc_id = 0; 5066 params.assoc_id = 0;
5042 } else if (len >= sizeof(struct sctp_assoc_value)) { 5067 } else if (len >= sizeof(struct sctp_assoc_value)) {
5043 len = sizeof(struct sctp_assoc_value); 5068 len = sizeof(struct sctp_assoc_value);
@@ -5580,7 +5605,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
5580 /* Note: sk->sk_num gets filled in if ephemeral port request. */ 5605 /* Note: sk->sk_num gets filled in if ephemeral port request. */
5581 ret = sctp_get_port_local(sk, &addr); 5606 ret = sctp_get_port_local(sk, &addr);
5582 5607
5583 return (ret ? 1 : 0); 5608 return ret ? 1 : 0;
5584} 5609}
5585 5610
5586/* 5611/*
@@ -5597,8 +5622,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5597 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5622 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5598 if (IS_ERR(tfm)) { 5623 if (IS_ERR(tfm)) {
5599 if (net_ratelimit()) { 5624 if (net_ratelimit()) {
5600 printk(KERN_INFO 5625 pr_info("failed to load transform for %s: %ld\n",
5601 "SCTP: failed to load transform for %s: %ld\n",
5602 sctp_hmac_alg, PTR_ERR(tfm)); 5626 sctp_hmac_alg, PTR_ERR(tfm));
5603 } 5627 }
5604 return -ENOSYS; 5628 return -ENOSYS;
@@ -5727,13 +5751,12 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5727 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 5751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
5728 mask |= POLLERR; 5752 mask |= POLLERR;
5729 if (sk->sk_shutdown & RCV_SHUTDOWN) 5753 if (sk->sk_shutdown & RCV_SHUTDOWN)
5730 mask |= POLLRDHUP; 5754 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
5731 if (sk->sk_shutdown == SHUTDOWN_MASK) 5755 if (sk->sk_shutdown == SHUTDOWN_MASK)
5732 mask |= POLLHUP; 5756 mask |= POLLHUP;
5733 5757
5734 /* Is it readable? Reconsider this code with TCP-style support. */ 5758 /* Is it readable? Reconsider this code with TCP-style support. */
5735 if (!skb_queue_empty(&sk->sk_receive_queue) || 5759 if (!skb_queue_empty(&sk->sk_receive_queue))
5736 (sk->sk_shutdown & RCV_SHUTDOWN))
5737 mask |= POLLIN | POLLRDNORM; 5760 mask |= POLLIN | POLLRDNORM;
5738 5761
5739 /* The association is either gone or not ready. */ 5762 /* The association is either gone or not ready. */
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 832590bbe0c0..50cb57f0919e 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -54,7 +54,7 @@ static int sack_timer_max = 500;
54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ 54static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
55static int rwnd_scale_max = 16; 55static int rwnd_scale_max = 16;
56 56
57extern int sysctl_sctp_mem[3]; 57extern long sysctl_sctp_mem[3];
58extern int sysctl_sctp_rmem[3]; 58extern int sysctl_sctp_rmem[3];
59extern int sysctl_sctp_wmem[3]; 59extern int sysctl_sctp_wmem[3];
60 60
@@ -203,7 +203,7 @@ static ctl_table sctp_table[] = {
203 .data = &sysctl_sctp_mem, 203 .data = &sysctl_sctp_mem,
204 .maxlen = sizeof(sysctl_sctp_mem), 204 .maxlen = sizeof(sysctl_sctp_mem),
205 .mode = 0644, 205 .mode = 0644,
206 .proc_handler = proc_dointvec, 206 .proc_handler = proc_doulongvec_minmax
207 }, 207 },
208 { 208 {
209 .procname = "sctp_rmem", 209 .procname = "sctp_rmem",
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82fc..d3ae493d234a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/types.h> 54#include <linux/types.h>
53#include <linux/random.h> 55#include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
244 struct dst_entry *dst; 246 struct dst_entry *dst;
245 247
246 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 248 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
247 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 249 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
248 "using default minimum of %d\n", 250 __func__, pmtu,
249 __func__, pmtu, 251 SCTP_DEFAULT_MINSEGMENT);
250 SCTP_DEFAULT_MINSEGMENT);
251 /* Use default minimum segment size and disable 252 /* Use default minimum segment size and disable
252 * pmtu discovery on this transport. 253 * pmtu discovery on this transport.
253 */ 254 */
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..3ca2fd9e3720 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -209,8 +209,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
209 * specified. Zero is returned for a success. 209 * specified. Zero is returned for a success.
210 */ 210 */
211 211
212int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 212static int move_addr_to_user(struct sockaddr *kaddr, int klen,
213 int __user *ulen) 213 void __user *uaddr, int __user *ulen)
214{ 214{
215 int err; 215 int err;
216 int len; 216 int len;
@@ -305,19 +305,17 @@ static const struct super_operations sockfs_ops = {
305 .statfs = simple_statfs, 305 .statfs = simple_statfs,
306}; 306};
307 307
308static int sockfs_get_sb(struct file_system_type *fs_type, 308static struct dentry *sockfs_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *data, 309 int flags, const char *dev_name, void *data)
310 struct vfsmount *mnt)
311{ 310{
312 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 311 return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
313 mnt);
314} 312}
315 313
316static struct vfsmount *sock_mnt __read_mostly; 314static struct vfsmount *sock_mnt __read_mostly;
317 315
318static struct file_system_type sock_fs_type = { 316static struct file_system_type sock_fs_type = {
319 .name = "sockfs", 317 .name = "sockfs",
320 .get_sb = sockfs_get_sb, 318 .mount = sockfs_mount,
321 .kill_sb = kill_anon_super, 319 .kill_sb = kill_anon_super,
322}; 320};
323 321
@@ -377,7 +375,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
377 &socket_file_ops); 375 &socket_file_ops);
378 if (unlikely(!file)) { 376 if (unlikely(!file)) {
379 /* drop dentry, keep inode */ 377 /* drop dentry, keep inode */
380 atomic_inc(&path.dentry->d_inode->i_count); 378 ihold(path.dentry->d_inode);
381 path_put(&path); 379 path_put(&path);
382 put_unused_fd(fd); 380 put_unused_fd(fd);
383 return -ENFILE; 381 return -ENFILE;
@@ -480,6 +478,7 @@ static struct socket *sock_alloc(void)
480 sock = SOCKET_I(inode); 478 sock = SOCKET_I(inode);
481 479
482 kmemcheck_annotate_bitfield(sock, type); 480 kmemcheck_annotate_bitfield(sock, type);
481 inode->i_ino = get_next_ino();
483 inode->i_mode = S_IFSOCK | S_IRWXUGO; 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
484 inode->i_uid = current_fsuid(); 483 inode->i_uid = current_fsuid();
485 inode->i_gid = current_fsgid(); 484 inode->i_gid = current_fsgid();
@@ -502,6 +501,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502const struct file_operations bad_sock_fops = { 501const struct file_operations bad_sock_fops = {
503 .owner = THIS_MODULE, 502 .owner = THIS_MODULE,
504 .open = sock_no_open, 503 .open = sock_no_open,
504 .llseek = noop_llseek,
505}; 505};
506 506
507/** 507/**
@@ -535,14 +535,13 @@ void sock_release(struct socket *sock)
535} 535}
536EXPORT_SYMBOL(sock_release); 536EXPORT_SYMBOL(sock_release);
537 537
538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
539 union skb_shared_tx *shtx)
540{ 539{
541 shtx->flags = 0; 540 *tx_flags = 0;
542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 541 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
543 shtx->hardware = 1; 542 *tx_flags |= SKBTX_HW_TSTAMP;
544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
545 shtx->software = 1; 544 *tx_flags |= SKBTX_SW_TSTAMP;
546 return 0; 545 return 0;
547} 546}
548EXPORT_SYMBOL(sock_tx_timestamp); 547EXPORT_SYMBOL(sock_tx_timestamp);
@@ -662,7 +661,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
662} 661}
663EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 662EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
664 663
665inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 664static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
665 struct sk_buff *skb)
666{ 666{
667 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 667 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
668 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 668 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
@@ -1144,7 +1144,7 @@ call_kill:
1144} 1144}
1145EXPORT_SYMBOL(sock_wake_async); 1145EXPORT_SYMBOL(sock_wake_async);
1146 1146
1147static int __sock_create(struct net *net, int family, int type, int protocol, 1147int __sock_create(struct net *net, int family, int type, int protocol,
1148 struct socket **res, int kern) 1148 struct socket **res, int kern)
1149{ 1149{
1150 int err; 1150 int err;
@@ -1256,6 +1256,7 @@ out_release:
1256 rcu_read_unlock(); 1256 rcu_read_unlock();
1257 goto out_sock_release; 1257 goto out_sock_release;
1258} 1258}
1259EXPORT_SYMBOL(__sock_create);
1259 1260
1260int sock_create(int family, int type, int protocol, struct socket **res) 1261int sock_create(int family, int type, int protocol, struct socket **res)
1261{ 1262{
@@ -1651,6 +1652,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1651 struct iovec iov; 1652 struct iovec iov;
1652 int fput_needed; 1653 int fput_needed;
1653 1654
1655 if (len > INT_MAX)
1656 len = INT_MAX;
1654 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1657 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1655 if (!sock) 1658 if (!sock)
1656 goto out; 1659 goto out;
@@ -1708,6 +1711,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1708 int err, err2; 1711 int err, err2;
1709 int fput_needed; 1712 int fput_needed;
1710 1713
1714 if (size > INT_MAX)
1715 size = INT_MAX;
1711 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1716 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1712 if (!sock) 1717 if (!sock)
1713 goto out; 1718 goto out;
@@ -1919,7 +1924,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1919 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1924 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1920 * checking falls down on this. 1925 * checking falls down on this.
1921 */ 1926 */
1922 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1927 if (copy_from_user(ctl_buf,
1928 (void __user __force *)msg_sys.msg_control,
1923 ctl_len)) 1929 ctl_len))
1924 goto out_freectl; 1930 goto out_freectl;
1925 msg_sys.msg_control = ctl_buf; 1931 msg_sys.msg_control = ctl_buf;
@@ -3054,14 +3060,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3054 char *optval, int *optlen) 3060 char *optval, int *optlen)
3055{ 3061{
3056 mm_segment_t oldfs = get_fs(); 3062 mm_segment_t oldfs = get_fs();
3063 char __user *uoptval;
3064 int __user *uoptlen;
3057 int err; 3065 int err;
3058 3066
3067 uoptval = (char __user __force *) optval;
3068 uoptlen = (int __user __force *) optlen;
3069
3059 set_fs(KERNEL_DS); 3070 set_fs(KERNEL_DS);
3060 if (level == SOL_SOCKET) 3071 if (level == SOL_SOCKET)
3061 err = sock_getsockopt(sock, level, optname, optval, optlen); 3072 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3062 else 3073 else
3063 err = sock->ops->getsockopt(sock, level, optname, optval, 3074 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3064 optlen); 3075 uoptlen);
3065 set_fs(oldfs); 3076 set_fs(oldfs);
3066 return err; 3077 return err;
3067} 3078}
@@ -3071,13 +3082,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3071 char *optval, unsigned int optlen) 3082 char *optval, unsigned int optlen)
3072{ 3083{
3073 mm_segment_t oldfs = get_fs(); 3084 mm_segment_t oldfs = get_fs();
3085 char __user *uoptval;
3074 int err; 3086 int err;
3075 3087
3088 uoptval = (char __user __force *) optval;
3089
3076 set_fs(KERNEL_DS); 3090 set_fs(KERNEL_DS);
3077 if (level == SOL_SOCKET) 3091 if (level == SOL_SOCKET)
3078 err = sock_setsockopt(sock, level, optname, optval, optlen); 3092 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3079 else 3093 else
3080 err = sock->ops->setsockopt(sock, level, optname, optval, 3094 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3081 optlen); 3095 optlen);
3082 set_fs(oldfs); 3096 set_fs(oldfs);
3083 return err; 3097 return err;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 3376d7657185..8873fd8ddacd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -36,22 +36,3 @@ config RPCSEC_GSS_KRB5
36 Kerberos support should be installed. 36 Kerberos support should be installed.
37 37
38 If unsure, say Y. 38 If unsure, say Y.
39
40config RPCSEC_GSS_SPKM3
41 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
42 depends on SUNRPC && EXPERIMENTAL
43 select SUNRPC_GSS
44 select CRYPTO
45 select CRYPTO_MD5
46 select CRYPTO_DES
47 select CRYPTO_CAST5
48 select CRYPTO_CBC
49 help
50 Choose Y here to enable Secure RPC using the SPKM3 public key
51 GSS-API mechanism (RFC 2025).
52
53 Secure RPC calls with SPKM3 require an auxiliary userspace
54 daemon which may be found in the Linux nfs-utils package
55 available from http://linux-nfs.org/.
56
57 If unsure, say N.
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index e9eaaf7d43c1..afe67849269f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -595,7 +595,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
595int 595int
596rpcauth_refreshcred(struct rpc_task *task) 596rpcauth_refreshcred(struct rpc_task *task)
597{ 597{
598 struct rpc_cred *cred = task->tk_rqstp->rq_cred; 598 struct rpc_cred *cred;
599 int err; 599 int err;
600 600
601 cred = task->tk_rqstp->rq_cred; 601 cred = task->tk_rqstp->rq_cred;
@@ -658,7 +658,7 @@ out1:
658 return err; 658 return err;
659} 659}
660 660
661void __exit rpcauth_remove_module(void) 661void rpcauth_remove_module(void)
662{ 662{
663 rpc_destroy_authunix(); 663 rpc_destroy_authunix();
664 rpc_destroy_generic_auth(); 664 rpc_destroy_generic_auth();
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 43162bb3b78f..e010a015d996 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -158,7 +158,7 @@ int __init rpc_init_generic_auth(void)
158 return rpcauth_init_credcache(&generic_auth); 158 return rpcauth_init_credcache(&generic_auth);
159} 159}
160 160
161void __exit rpc_destroy_generic_auth(void) 161void rpc_destroy_generic_auth(void)
162{ 162{
163 rpcauth_destroy_credcache(&generic_auth); 163 rpcauth_destroy_credcache(&generic_auth);
164} 164}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 74a231735f67..7350d86a32ee 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -11,8 +11,3 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16
17rpcsec_gss_spkm3-objs := gss_spkm3_mech.o gss_spkm3_seal.o gss_spkm3_unseal.o \
18 gss_spkm3_token.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 12c485982814..3835ce35e224 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1050,7 +1050,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1050out: 1050out:
1051 if (acred->machine_cred != gss_cred->gc_machine_cred) 1051 if (acred->machine_cred != gss_cred->gc_machine_cred)
1052 return 0; 1052 return 0;
1053 return (rc->cr_uid == acred->uid); 1053 return rc->cr_uid == acred->uid;
1054} 1054}
1055 1055
1056/* 1056/*
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e99456..c586e92bcf76 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
76der_length_size( int length) 76der_length_size( int length)
77{ 77{
78 if (length < (1<<7)) 78 if (length < (1<<7))
79 return(1); 79 return 1;
80 else if (length < (1<<8)) 80 else if (length < (1<<8))
81 return(2); 81 return 2;
82#if (SIZEOF_INT == 2) 82#if (SIZEOF_INT == 2)
83 else 83 else
84 return(3); 84 return 3;
85#else 85#else
86 else if (length < (1<<16)) 86 else if (length < (1<<16))
87 return(3); 87 return 3;
88 else if (length < (1<<24)) 88 else if (length < (1<<24))
89 return(4); 89 return 4;
90 else 90 else
91 return(5); 91 return 5;
92#endif 92#endif
93} 93}
94 94
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
121 int ret; 121 int ret;
122 122
123 if (*bufsize < 1) 123 if (*bufsize < 1)
124 return(-1); 124 return -1;
125 sf = *(*buf)++; 125 sf = *(*buf)++;
126 (*bufsize)--; 126 (*bufsize)--;
127 if (sf & 0x80) { 127 if (sf & 0x80) {
128 if ((sf &= 0x7f) > ((*bufsize)-1)) 128 if ((sf &= 0x7f) > ((*bufsize)-1))
129 return(-1); 129 return -1;
130 if (sf > SIZEOF_INT) 130 if (sf > SIZEOF_INT)
131 return (-1); 131 return -1;
132 ret = 0; 132 ret = 0;
133 for (; sf; sf--) { 133 for (; sf; sf--) {
134 ret = (ret<<8) + (*(*buf)++); 134 ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
138 ret = sf; 138 ret = sf;
139 } 139 }
140 140
141 return(ret); 141 return ret;
142} 142}
143 143
144/* returns the length of a token, given the mech oid and the body size */ 144/* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
148{ 148{
149 /* set body_size to sequence contents size */ 149 /* set body_size to sequence contents size */
150 body_size += 2 + (int) mech->len; /* NEED overflow check */ 150 body_size += 2 + (int) mech->len; /* NEED overflow check */
151 return(1 + der_length_size(body_size) + body_size); 151 return 1 + der_length_size(body_size) + body_size;
152} 152}
153 153
154EXPORT_SYMBOL_GPL(g_token_size); 154EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
186 int ret = 0; 186 int ret = 0;
187 187
188 if ((toksize-=1) < 0) 188 if ((toksize-=1) < 0)
189 return(G_BAD_TOK_HEADER); 189 return G_BAD_TOK_HEADER;
190 if (*buf++ != 0x60) 190 if (*buf++ != 0x60)
191 return(G_BAD_TOK_HEADER); 191 return G_BAD_TOK_HEADER;
192 192
193 if ((seqsize = der_read_length(&buf, &toksize)) < 0) 193 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
194 return(G_BAD_TOK_HEADER); 194 return G_BAD_TOK_HEADER;
195 195
196 if (seqsize != toksize) 196 if (seqsize != toksize)
197 return(G_BAD_TOK_HEADER); 197 return G_BAD_TOK_HEADER;
198 198
199 if ((toksize-=1) < 0) 199 if ((toksize-=1) < 0)
200 return(G_BAD_TOK_HEADER); 200 return G_BAD_TOK_HEADER;
201 if (*buf++ != 0x06) 201 if (*buf++ != 0x06)
202 return(G_BAD_TOK_HEADER); 202 return G_BAD_TOK_HEADER;
203 203
204 if ((toksize-=1) < 0) 204 if ((toksize-=1) < 0)
205 return(G_BAD_TOK_HEADER); 205 return G_BAD_TOK_HEADER;
206 toid.len = *buf++; 206 toid.len = *buf++;
207 207
208 if ((toksize-=toid.len) < 0) 208 if ((toksize-=toid.len) < 0)
209 return(G_BAD_TOK_HEADER); 209 return G_BAD_TOK_HEADER;
210 toid.data = buf; 210 toid.data = buf;
211 buf+=toid.len; 211 buf+=toid.len;
212 212
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
217 to return G_BAD_TOK_HEADER if the token header is in fact bad */ 217 to return G_BAD_TOK_HEADER if the token header is in fact bad */
218 218
219 if ((toksize-=2) < 0) 219 if ((toksize-=2) < 0)
220 return(G_BAD_TOK_HEADER); 220 return G_BAD_TOK_HEADER;
221 221
222 if (ret) 222 if (ret)
223 return(ret); 223 return ret;
224 224
225 if (!ret) { 225 if (!ret) {
226 *buf_in = buf; 226 *buf_in = buf;
227 *body_size = toksize; 227 *body_size = toksize;
228 } 228 }
229 229
230 return(ret); 230 return ret;
231} 231}
232 232
233EXPORT_SYMBOL_GPL(g_verify_token_header); 233EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 778e5dfc5144..f375decc024b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
427context_derive_keys_rc4(struct krb5_ctx *ctx) 427context_derive_keys_rc4(struct krb5_ctx *ctx)
428{ 428{
429 struct crypto_hash *hmac; 429 struct crypto_hash *hmac;
430 char sigkeyconstant[] = "signaturekey"; 430 static const char sigkeyconstant[] = "signaturekey";
431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 431 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
432 struct hash_desc desc; 432 struct hash_desc desc;
433 struct scatterlist sg[1]; 433 struct scatterlist sg[1];
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba382..62ac90c62cb1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
162 *seqnum = ((plain[0]) | 162 *seqnum = ((plain[0]) |
163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); 163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
164 164
165 return (0); 165 return 0;
166} 166}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc78..8b4061049d76 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
331 *context_handle); 331 *context_handle);
332 332
333 if (!*context_handle) 333 if (!*context_handle)
334 return(GSS_S_NO_CONTEXT); 334 return GSS_S_NO_CONTEXT;
335 if ((*context_handle)->internal_ctx_id) 335 if ((*context_handle)->internal_ctx_id)
336 (*context_handle)->mech_type->gm_ops 336 (*context_handle)->mech_type->gm_ops
337 ->gss_delete_sec_context((*context_handle) 337 ->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
deleted file mode 100644
index adade3d313f2..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_mech.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 * J. Bruce Fields <bfields@umich.edu>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
24 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 */
36
37#include <linux/err.h>
38#include <linux/module.h>
39#include <linux/init.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/sunrpc/auth.h>
43#include <linux/in.h>
44#include <linux/sunrpc/svcauth_gss.h>
45#include <linux/sunrpc/gss_spkm3.h>
46#include <linux/sunrpc/xdr.h>
47#include <linux/crypto.h>
48
49#ifdef RPC_DEBUG
50# define RPCDBG_FACILITY RPCDBG_AUTH
51#endif
52
53static const void *
54simple_get_bytes(const void *p, const void *end, void *res, int len)
55{
56 const void *q = (const void *)((const char *)p + len);
57 if (unlikely(q > end || q < p))
58 return ERR_PTR(-EFAULT);
59 memcpy(res, p, len);
60 return q;
61}
62
63static const void *
64simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
65{
66 const void *q;
67 unsigned int len;
68 p = simple_get_bytes(p, end, &len, sizeof(len));
69 if (IS_ERR(p))
70 return p;
71 res->len = len;
72 if (len == 0) {
73 res->data = NULL;
74 return p;
75 }
76 q = (const void *)((const char *)p + len);
77 if (unlikely(q > end || q < p))
78 return ERR_PTR(-EFAULT);
79 res->data = kmemdup(p, len, GFP_NOFS);
80 if (unlikely(res->data == NULL))
81 return ERR_PTR(-ENOMEM);
82 return q;
83}
84
85static int
86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
89{
90 const void *end = (const void *)((const char *)p + len);
91 struct spkm3_ctx *ctx;
92 int version;
93
94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
95 goto out_err;
96
97 p = simple_get_bytes(p, end, &version, sizeof(version));
98 if (IS_ERR(p))
99 goto out_err_free_ctx;
100 if (version != 1) {
101 dprintk("RPC: unknown spkm3 token format: "
102 "obsolete nfs-utils?\n");
103 p = ERR_PTR(-EINVAL);
104 goto out_err_free_ctx;
105 }
106
107 p = simple_get_netobj(p, end, &ctx->ctx_id);
108 if (IS_ERR(p))
109 goto out_err_free_ctx;
110
111 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
112 if (IS_ERR(p))
113 goto out_err_free_ctx_id;
114
115 p = simple_get_netobj(p, end, &ctx->mech_used);
116 if (IS_ERR(p))
117 goto out_err_free_ctx_id;
118
119 p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
120 if (IS_ERR(p))
121 goto out_err_free_mech;
122
123 p = simple_get_netobj(p, end, &ctx->conf_alg);
124 if (IS_ERR(p))
125 goto out_err_free_mech;
126
127 p = simple_get_netobj(p, end, &ctx->derived_conf_key);
128 if (IS_ERR(p))
129 goto out_err_free_conf_alg;
130
131 p = simple_get_netobj(p, end, &ctx->intg_alg);
132 if (IS_ERR(p))
133 goto out_err_free_conf_key;
134
135 p = simple_get_netobj(p, end, &ctx->derived_integ_key);
136 if (IS_ERR(p))
137 goto out_err_free_intg_alg;
138
139 if (p != end) {
140 p = ERR_PTR(-EFAULT);
141 goto out_err_free_intg_key;
142 }
143
144 ctx_id->internal_ctx_id = ctx;
145
146 dprintk("RPC: Successfully imported new spkm context.\n");
147 return 0;
148
149out_err_free_intg_key:
150 kfree(ctx->derived_integ_key.data);
151out_err_free_intg_alg:
152 kfree(ctx->intg_alg.data);
153out_err_free_conf_key:
154 kfree(ctx->derived_conf_key.data);
155out_err_free_conf_alg:
156 kfree(ctx->conf_alg.data);
157out_err_free_mech:
158 kfree(ctx->mech_used.data);
159out_err_free_ctx_id:
160 kfree(ctx->ctx_id.data);
161out_err_free_ctx:
162 kfree(ctx);
163out_err:
164 return PTR_ERR(p);
165}
166
167static void
168gss_delete_sec_context_spkm3(void *internal_ctx)
169{
170 struct spkm3_ctx *sctx = internal_ctx;
171
172 kfree(sctx->derived_integ_key.data);
173 kfree(sctx->intg_alg.data);
174 kfree(sctx->derived_conf_key.data);
175 kfree(sctx->conf_alg.data);
176 kfree(sctx->mech_used.data);
177 kfree(sctx->ctx_id.data);
178 kfree(sctx);
179}
180
181static u32
182gss_verify_mic_spkm3(struct gss_ctx *ctx,
183 struct xdr_buf *signbuf,
184 struct xdr_netobj *checksum)
185{
186 u32 maj_stat = 0;
187 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
188
189 maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
190
191 dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
192 return maj_stat;
193}
194
195static u32
196gss_get_mic_spkm3(struct gss_ctx *ctx,
197 struct xdr_buf *message_buffer,
198 struct xdr_netobj *message_token)
199{
200 u32 err = 0;
201 struct spkm3_ctx *sctx = ctx->internal_ctx_id;
202
203 err = spkm3_make_token(sctx, message_buffer,
204 message_token, SPKM_MIC_TOK);
205 dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
206 return err;
207}
208
209static const struct gss_api_ops gss_spkm3_ops = {
210 .gss_import_sec_context = gss_import_sec_context_spkm3,
211 .gss_get_mic = gss_get_mic_spkm3,
212 .gss_verify_mic = gss_verify_mic_spkm3,
213 .gss_delete_sec_context = gss_delete_sec_context_spkm3,
214};
215
216static struct pf_desc gss_spkm3_pfs[] = {
217 {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"},
218 {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"},
219};
220
221static struct gss_api_mech gss_spkm3_mech = {
222 .gm_name = "spkm3",
223 .gm_owner = THIS_MODULE,
224 .gm_oid = {7, "\053\006\001\005\005\001\003"},
225 .gm_ops = &gss_spkm3_ops,
226 .gm_pf_num = ARRAY_SIZE(gss_spkm3_pfs),
227 .gm_pfs = gss_spkm3_pfs,
228};
229
230static int __init init_spkm3_module(void)
231{
232 int status;
233
234 status = gss_mech_register(&gss_spkm3_mech);
235 if (status)
236 printk("Failed to register spkm3 gss mechanism!\n");
237 return status;
238}
239
240static void __exit cleanup_spkm3_module(void)
241{
242 gss_mech_unregister(&gss_spkm3_mech);
243}
244
245MODULE_LICENSE("GPL");
246module_init(init_spkm3_module);
247module_exit(cleanup_spkm3_module);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
deleted file mode 100644
index 5a3a65a0e2b4..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ /dev/null
@@ -1,186 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_seal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/jiffies.h>
38#include <linux/sunrpc/gss_spkm3.h>
39#include <linux/random.h>
40#include <linux/crypto.h>
41#include <linux/pagemap.h>
42#include <linux/scatterlist.h>
43#include <linux/sunrpc/xdr.h>
44
45#ifdef RPC_DEBUG
46# define RPCDBG_FACILITY RPCDBG_AUTH
47#endif
48
49const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
50const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
51
52/*
53 * spkm3_make_token()
54 *
55 * Only SPKM_MIC_TOK with md5 intg-alg is supported
56 */
57
58u32
59spkm3_make_token(struct spkm3_ctx *ctx,
60 struct xdr_buf * text, struct xdr_netobj * token,
61 int toktype)
62{
63 s32 checksum_type;
64 char tokhdrbuf[25];
65 char cksumdata[16];
66 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
67 struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf};
68 int tokenlen = 0;
69 unsigned char *ptr;
70 s32 now;
71 int ctxelen = 0, ctxzbit = 0;
72 int md5elen = 0, md5zbit = 0;
73
74 now = jiffies;
75
76 if (ctx->ctx_id.len != 16) {
77 dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
78 ctx->ctx_id.len);
79 goto out_err;
80 }
81
82 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
83 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
84 "algorithm. only support hmac-md5 I-ALG.\n");
85 goto out_err;
86 } else
87 checksum_type = CKSUMTYPE_HMAC_MD5;
88
89 if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
90 dprintk("RPC: gss_spkm3_seal: unsupported C-ALG "
91 "algorithm\n");
92 goto out_err;
93 }
94
95 if (toktype == SPKM_MIC_TOK) {
96 /* Calculate checksum over the mic-header */
97 asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
98 spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
99 ctxelen, ctxzbit);
100 if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
101 (char *)mic_hdr.data, mic_hdr.len,
102 text, 0, &md5cksum))
103 goto out_err;
104
105 asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
106 tokenlen = 10 + ctxelen + 1 + md5elen + 1;
107
108 /* Create token header using generic routines */
109 token->len = g_token_size(&ctx->mech_used, tokenlen + 2);
110
111 ptr = token->data;
112 g_make_token_header(&ctx->mech_used, tokenlen + 2, &ptr);
113
114 spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
115 } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
116 dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK "
117 "not supported\n");
118 goto out_err;
119 }
120
121 /* XXX need to implement sequence numbers, and ctx->expired */
122
123 return GSS_S_COMPLETE;
124out_err:
125 token->data = NULL;
126 token->len = 0;
127 return GSS_S_FAILURE;
128}
129
130static int
131spkm3_checksummer(struct scatterlist *sg, void *data)
132{
133 struct hash_desc *desc = data;
134
135 return crypto_hash_update(desc, sg, sg->length);
136}
137
138/* checksum the plaintext data and hdrlen bytes of the token header */
139s32
140make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
141 unsigned int hdrlen, struct xdr_buf *body,
142 unsigned int body_offset, struct xdr_netobj *cksum)
143{
144 char *cksumname;
145 struct hash_desc desc; /* XXX add to ctx? */
146 struct scatterlist sg[1];
147 int err;
148
149 switch (cksumtype) {
150 case CKSUMTYPE_HMAC_MD5:
151 cksumname = "hmac(md5)";
152 break;
153 default:
154 dprintk("RPC: spkm3_make_checksum:"
155 " unsupported checksum %d", cksumtype);
156 return GSS_S_FAILURE;
157 }
158
159 if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
160
161 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
162 if (IS_ERR(desc.tfm))
163 return GSS_S_FAILURE;
164 cksum->len = crypto_hash_digestsize(desc.tfm);
165 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
166
167 err = crypto_hash_setkey(desc.tfm, key->data, key->len);
168 if (err)
169 goto out;
170
171 err = crypto_hash_init(&desc);
172 if (err)
173 goto out;
174
175 sg_init_one(sg, header, hdrlen);
176 crypto_hash_update(&desc, sg, sg->length);
177
178 xdr_process_buf(body, body_offset, body->len - body_offset,
179 spkm3_checksummer, &desc);
180 crypto_hash_final(&desc, cksum->data);
181
182out:
183 crypto_free_hash(desc.tfm);
184
185 return err ? GSS_S_FAILURE : 0;
186}
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
deleted file mode 100644
index a99825d7caa0..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ /dev/null
@@ -1,267 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_token.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/random.h>
41#include <linux/crypto.h>
42
43#ifdef RPC_DEBUG
44# define RPCDBG_FACILITY RPCDBG_AUTH
45#endif
46
47/*
48 * asn1_bitstring_len()
49 *
50 * calculate the asn1 bitstring length of the xdr_netobject
51 */
52void
53asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits)
54{
55 int i, zbit = 0,elen = in->len;
56 char *ptr;
57
58 ptr = &in->data[in->len -1];
59
60 /* count trailing 0's */
61 for(i = in->len; i > 0; i--) {
62 if (*ptr == 0) {
63 ptr--;
64 elen--;
65 } else
66 break;
67 }
68
69 /* count number of 0 bits in final octet */
70 ptr = &in->data[elen - 1];
71 for(i = 0; i < 8; i++) {
72 short mask = 0x01;
73
74 if (!((mask << i) & *ptr))
75 zbit++;
76 else
77 break;
78 }
79 *enclen = elen;
80 *zerobits = zbit;
81}
82
83/*
84 * decode_asn1_bitstring()
85 *
86 * decode a bitstring into a buffer of the expected length.
87 * enclen = bit string length
88 * explen = expected length (define in rfc)
89 */
90int
91decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen)
92{
93 if (!(out->data = kzalloc(explen,GFP_NOFS)))
94 return 0;
95 out->len = explen;
96 memcpy(out->data, in, enclen);
97 return 1;
98}
99
100/*
101 * SPKMInnerContextToken choice SPKM_MIC asn1 token layout
102 *
103 * contextid is always 16 bytes plain data. max asn1 bitstring len = 17.
104 *
105 * tokenlen = pos[0] to end of token (max pos[45] with MD5 cksum)
106 *
107 * pos value
108 * ----------
109 * [0] a4 SPKM-MIC tag
110 * [1] ?? innertoken length (max 44)
111 *
112 *
113 * tok_hdr piece of checksum data starts here
114 *
115 * the maximum mic-header len = 9 + 17 = 26
116 * mic-header
117 * ----------
118 * [2] 30 SEQUENCE tag
119 * [3] ?? mic-header length: (max 23) = TokenID + ContextID
120 *
121 * TokenID - all fields constant and can be hardcoded
122 * -------
123 * [4] 02 Type 2
124 * [5] 02 Length 2
125 * [6][7] 01 01 TokenID (SPKM_MIC_TOK)
126 *
127 * ContextID - encoded length not constant, calculated
128 * ---------
129 * [8] 03 Type 3
130 * [9] ?? encoded length
131 * [10] ?? ctxzbit
132 * [11] contextid
133 *
134 * mic_header piece of checksum data ends here.
135 *
136 * int-cksum - encoded length not constant, calculated
137 * ---------
138 * [??] 03 Type 3
139 * [??] ?? encoded length
140 * [??] ?? md5zbit
141 * [??] int-cksum (NID_md5 = 16)
142 *
143 * maximum SPKM-MIC innercontext token length =
144 * 10 + encoded contextid_size(17 max) + 2 + encoded
145 * cksum_size (17 maxfor NID_md5) = 46
146 */
147
148/*
149 * spkm3_mic_header()
150 *
151 * Prepare the SPKM_MIC_TOK mic-header for check-sum calculation
152 * elen: 16 byte context id asn1 bitstring encoded length
153 */
154void
155spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ctxdata, int elen, int zbit)
156{
157 char *hptr = *hdrbuf;
158 char *top = *hdrbuf;
159
160 *(u8 *)hptr++ = 0x30;
161 *(u8 *)hptr++ = elen + 7; /* on the wire header length */
162
163 /* tokenid */
164 *(u8 *)hptr++ = 0x02;
165 *(u8 *)hptr++ = 0x02;
166 *(u8 *)hptr++ = 0x01;
167 *(u8 *)hptr++ = 0x01;
168
169 /* coniextid */
170 *(u8 *)hptr++ = 0x03;
171 *(u8 *)hptr++ = elen + 1; /* add 1 to include zbit */
172 *(u8 *)hptr++ = zbit;
173 memcpy(hptr, ctxdata, elen);
174 hptr += elen;
175 *hdrlen = hptr - top;
176}
177
178/*
179 * spkm3_mic_innercontext_token()
180 *
181 * *tokp points to the beginning of the SPKM_MIC token described
182 * in rfc 2025, section 3.2.1:
183 *
184 * toklen is the inner token length
185 */
186void
187spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
188{
189 unsigned char *ict = *tokp;
190
191 *(u8 *)ict++ = 0xa4;
192 *(u8 *)ict++ = toklen;
193 memcpy(ict, mic_hdr->data, mic_hdr->len);
194 ict += mic_hdr->len;
195
196 *(u8 *)ict++ = 0x03;
197 *(u8 *)ict++ = md5elen + 1; /* add 1 to include zbit */
198 *(u8 *)ict++ = md5zbit;
199 memcpy(ict, md5cksum->data, md5elen);
200}
201
202u32
203spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **cksum)
204{
205 struct xdr_netobj spkm3_ctx_id = {.len =0, .data = NULL};
206 unsigned char *ptr = *tokp;
207 int ctxelen;
208 u32 ret = GSS_S_DEFECTIVE_TOKEN;
209
210 /* spkm3 innercontext token preamble */
211 if ((ptr[0] != 0xa4) || (ptr[2] != 0x30)) {
212 dprintk("RPC: BAD SPKM ictoken preamble\n");
213 goto out;
214 }
215
216 *mic_hdrlen = ptr[3];
217
218 /* token type */
219 if ((ptr[4] != 0x02) || (ptr[5] != 0x02)) {
220 dprintk("RPC: BAD asn1 SPKM3 token type\n");
221 goto out;
222 }
223
224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out;
228 }
229
230 /* contextid */
231 if (ptr[8] != 0x03) {
232 dprintk("RPC: BAD SPKM3 asn1 context-id type\n");
233 goto out;
234 }
235
236 ctxelen = ptr[9];
237 if (ctxelen > 17) { /* length includes asn1 zbit octet */
238 dprintk("RPC: BAD SPKM3 contextid len %d\n", ctxelen);
239 goto out;
240 }
241
242 /* ignore ptr[10] */
243
244 if(!decode_asn1_bitstring(&spkm3_ctx_id, &ptr[11], ctxelen - 1, 16))
245 goto out;
246
247 /*
248 * in the current implementation: the optional int-alg is not present
249 * so the default int-alg (md5) is used the optional snd-seq field is
250 * also not present
251 */
252
253 if (*mic_hdrlen != 6 + ctxelen) {
254 dprintk("RPC: BAD SPKM_ MIC_TOK header len %d: we only "
255 "support default int-alg (should be absent) "
256 "and do not support snd-seq\n", *mic_hdrlen);
257 goto out;
258 }
259 /* checksum */
260 *cksum = (&ptr[10] + ctxelen); /* ctxelen includes ptr[10] */
261
262 ret = GSS_S_COMPLETE;
263out:
264 kfree(spkm3_ctx_id.data);
265 return ret;
266}
267
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
deleted file mode 100644
index cc21ee860bb6..000000000000
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * linux/net/sunrpc/gss_spkm3_unseal.c
3 *
4 * Copyright (c) 2003 The Regents of the University of Michigan.
5 * All rights reserved.
6 *
7 * Andy Adamson <andros@umich.edu>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 */
35
36#include <linux/types.h>
37#include <linux/slab.h>
38#include <linux/jiffies.h>
39#include <linux/sunrpc/gss_spkm3.h>
40#include <linux/crypto.h>
41
42#ifdef RPC_DEBUG
43# define RPCDBG_FACILITY RPCDBG_AUTH
44#endif
45
46/*
47 * spkm3_read_token()
48 *
49 * only SPKM_MIC_TOK with md5 intg-alg is supported
50 */
51u32
52spkm3_read_token(struct spkm3_ctx *ctx,
53 struct xdr_netobj *read_token, /* checksum */
54 struct xdr_buf *message_buffer, /* signbuf */
55 int toktype)
56{
57 s32 checksum_type;
58 s32 code;
59 struct xdr_netobj wire_cksum = {.len =0, .data = NULL};
60 char cksumdata[16];
61 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata};
62 unsigned char *ptr = (unsigned char *)read_token->data;
63 unsigned char *cksum;
64 int bodysize, md5elen;
65 int mic_hdrlen;
66 u32 ret = GSS_S_DEFECTIVE_TOKEN;
67
68 if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
69 &bodysize, &ptr, read_token->len))
70 goto out;
71
72 /* decode the token */
73
74 if (toktype != SPKM_MIC_TOK) {
75 dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
76 goto out;
77 }
78
79 if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
80 goto out;
81
82 if (*cksum++ != 0x03) {
83 dprintk("RPC: spkm3_read_token BAD checksum type\n");
84 goto out;
85 }
86 md5elen = *cksum++;
87 cksum++; /* move past the zbit */
88
89 if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
90 goto out;
91
92 /* HARD CODED FOR MD5 */
93
94 /* compute the checksum of the message.
95 * ptr + 2 = start of header piece of checksum
96 * mic_hdrlen + 2 = length of header piece of checksum
97 */
98 ret = GSS_S_DEFECTIVE_TOKEN;
99 if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
100 dprintk("RPC: gss_spkm3_seal: unsupported I-ALG "
101 "algorithm\n");
102 goto out;
103 }
104
105 checksum_type = CKSUMTYPE_HMAC_MD5;
106
107 code = make_spkm3_checksum(checksum_type,
108 &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
109 message_buffer, 0, &md5cksum);
110
111 if (code)
112 goto out;
113
114 ret = GSS_S_BAD_SIG;
115 code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
116 if (code) {
117 dprintk("RPC: bad MIC checksum\n");
118 goto out;
119 }
120
121
122 /* XXX: need to add expiration and sequencing */
123 ret = GSS_S_COMPLETE;
124out:
125 kfree(wire_cksum.data);
126 return ret;
127}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cc385b3a59c2..dec2a6fc7c12 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -964,7 +964,7 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
964 if (rqstp->rq_gssclient == NULL) 964 if (rqstp->rq_gssclient == NULL)
965 return SVC_DENIED; 965 return SVC_DENIED;
966 stat = svcauth_unix_set_client(rqstp); 966 stat = svcauth_unix_set_client(rqstp);
967 if (stat == SVC_DROP) 967 if (stat == SVC_DROP || stat == SVC_CLOSE)
968 return stat; 968 return stat;
969 return SVC_OK; 969 return SVC_OK;
970} 970}
@@ -1018,7 +1018,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1018 return SVC_DENIED; 1018 return SVC_DENIED;
1019 memset(&rsikey, 0, sizeof(rsikey)); 1019 memset(&rsikey, 0, sizeof(rsikey));
1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) 1020 if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
1021 return SVC_DROP; 1021 return SVC_CLOSE;
1022 *authp = rpc_autherr_badverf; 1022 *authp = rpc_autherr_badverf;
1023 if (svc_safe_getnetobj(argv, &tmpobj)) { 1023 if (svc_safe_getnetobj(argv, &tmpobj)) {
1024 kfree(rsikey.in_handle.data); 1024 kfree(rsikey.in_handle.data);
@@ -1026,38 +1026,35 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
1026 } 1026 }
1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) { 1027 if (dup_netobj(&rsikey.in_token, &tmpobj)) {
1028 kfree(rsikey.in_handle.data); 1028 kfree(rsikey.in_handle.data);
1029 return SVC_DROP; 1029 return SVC_CLOSE;
1030 } 1030 }
1031 1031
1032 /* Perform upcall, or find upcall result: */ 1032 /* Perform upcall, or find upcall result: */
1033 rsip = rsi_lookup(&rsikey); 1033 rsip = rsi_lookup(&rsikey);
1034 rsi_free(&rsikey); 1034 rsi_free(&rsikey);
1035 if (!rsip) 1035 if (!rsip)
1036 return SVC_DROP; 1036 return SVC_CLOSE;
1037 switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { 1037 if (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0)
1038 case -EAGAIN:
1039 case -ETIMEDOUT:
1040 case -ENOENT:
1041 /* No upcall result: */ 1038 /* No upcall result: */
1042 return SVC_DROP; 1039 return SVC_CLOSE;
1043 case 0: 1040
1044 ret = SVC_DROP; 1041 ret = SVC_CLOSE;
1045 /* Got an answer to the upcall; use it: */ 1042 /* Got an answer to the upcall; use it: */
1046 if (gss_write_init_verf(rqstp, rsip)) 1043 if (gss_write_init_verf(rqstp, rsip))
1047 goto out; 1044 goto out;
1048 if (resv->iov_len + 4 > PAGE_SIZE) 1045 if (resv->iov_len + 4 > PAGE_SIZE)
1049 goto out; 1046 goto out;
1050 svc_putnl(resv, RPC_SUCCESS); 1047 svc_putnl(resv, RPC_SUCCESS);
1051 if (svc_safe_putnetobj(resv, &rsip->out_handle)) 1048 if (svc_safe_putnetobj(resv, &rsip->out_handle))
1052 goto out; 1049 goto out;
1053 if (resv->iov_len + 3 * 4 > PAGE_SIZE) 1050 if (resv->iov_len + 3 * 4 > PAGE_SIZE)
1054 goto out; 1051 goto out;
1055 svc_putnl(resv, rsip->major_status); 1052 svc_putnl(resv, rsip->major_status);
1056 svc_putnl(resv, rsip->minor_status); 1053 svc_putnl(resv, rsip->minor_status);
1057 svc_putnl(resv, GSS_SEQ_WIN); 1054 svc_putnl(resv, GSS_SEQ_WIN);
1058 if (svc_safe_putnetobj(resv, &rsip->out_token)) 1055 if (svc_safe_putnetobj(resv, &rsip->out_token))
1059 goto out; 1056 goto out;
1060 } 1057
1061 ret = SVC_COMPLETE; 1058 ret = SVC_COMPLETE;
1062out: 1059out:
1063 cache_put(&rsip->h, &rsi_cache); 1060 cache_put(&rsip->h, &rsi_cache);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b06410e584e..e433e7580e27 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,21 +28,21 @@
28#include <linux/workqueue.h> 28#include <linux/workqueue.h>
29#include <linux/mutex.h> 29#include <linux/mutex.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/smp_lock.h>
32#include <asm/ioctls.h> 31#include <asm/ioctls.h>
33#include <linux/sunrpc/types.h> 32#include <linux/sunrpc/types.h>
34#include <linux/sunrpc/cache.h> 33#include <linux/sunrpc/cache.h>
35#include <linux/sunrpc/stats.h> 34#include <linux/sunrpc/stats.h>
36#include <linux/sunrpc/rpc_pipe_fs.h> 35#include <linux/sunrpc/rpc_pipe_fs.h>
36#include "netns.h"
37 37
38#define RPCDBG_FACILITY RPCDBG_CACHE 38#define RPCDBG_FACILITY RPCDBG_CACHE
39 39
40static int cache_defer_req(struct cache_req *req, struct cache_head *item); 40static void cache_defer_req(struct cache_req *req, struct cache_head *item);
41static void cache_revisit_request(struct cache_head *item); 41static void cache_revisit_request(struct cache_head *item);
42 42
43static void cache_init(struct cache_head *h) 43static void cache_init(struct cache_head *h)
44{ 44{
45 time_t now = get_seconds(); 45 time_t now = seconds_since_boot();
46 h->next = NULL; 46 h->next = NULL;
47 h->flags = 0; 47 h->flags = 0;
48 kref_init(&h->ref); 48 kref_init(&h->ref);
@@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h)
52 52
53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) 53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
54{ 54{
55 return (h->expiry_time < get_seconds()) || 55 return (h->expiry_time < seconds_since_boot()) ||
56 (detail->flush_time > h->last_refresh); 56 (detail->flush_time > h->last_refresh);
57} 57}
58 58
@@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
127static void cache_fresh_locked(struct cache_head *head, time_t expiry) 127static void cache_fresh_locked(struct cache_head *head, time_t expiry)
128{ 128{
129 head->expiry_time = expiry; 129 head->expiry_time = expiry;
130 head->last_refresh = get_seconds(); 130 head->last_refresh = seconds_since_boot();
131 set_bit(CACHE_VALID, &head->flags); 131 set_bit(CACHE_VALID, &head->flags);
132} 132}
133 133
@@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail,
238 238
239 /* now see if we want to start an upcall */ 239 /* now see if we want to start an upcall */
240 refresh_age = (h->expiry_time - h->last_refresh); 240 refresh_age = (h->expiry_time - h->last_refresh);
241 age = get_seconds() - h->last_refresh; 241 age = seconds_since_boot() - h->last_refresh;
242 242
243 if (rqstp == NULL) { 243 if (rqstp == NULL) {
244 if (rv == -EAGAIN) 244 if (rv == -EAGAIN)
@@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail,
253 cache_revisit_request(h); 253 cache_revisit_request(h);
254 if (rv == -EAGAIN) { 254 if (rv == -EAGAIN) {
255 set_bit(CACHE_NEGATIVE, &h->flags); 255 set_bit(CACHE_NEGATIVE, &h->flags);
256 cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY); 256 cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
257 cache_fresh_unlocked(h, detail); 257 cache_fresh_unlocked(h, detail);
258 rv = -ENOENT; 258 rv = -ENOENT;
259 } 259 }
@@ -268,7 +268,8 @@ int cache_check(struct cache_detail *detail,
268 } 268 }
269 269
270 if (rv == -EAGAIN) { 270 if (rv == -EAGAIN) {
271 if (cache_defer_req(rqstp, h) < 0) { 271 cache_defer_req(rqstp, h);
272 if (!test_bit(CACHE_PENDING, &h->flags)) {
272 /* Request is not deferred */ 273 /* Request is not deferred */
273 rv = cache_is_valid(detail, h); 274 rv = cache_is_valid(detail, h);
274 if (rv == -EAGAIN) 275 if (rv == -EAGAIN)
@@ -388,11 +389,11 @@ static int cache_clean(void)
388 return -1; 389 return -1;
389 } 390 }
390 current_detail = list_entry(next, struct cache_detail, others); 391 current_detail = list_entry(next, struct cache_detail, others);
391 if (current_detail->nextcheck > get_seconds()) 392 if (current_detail->nextcheck > seconds_since_boot())
392 current_index = current_detail->hash_size; 393 current_index = current_detail->hash_size;
393 else { 394 else {
394 current_index = 0; 395 current_index = 0;
395 current_detail->nextcheck = get_seconds()+30*60; 396 current_detail->nextcheck = seconds_since_boot()+30*60;
396 } 397 }
397 } 398 }
398 399
@@ -477,7 +478,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
477void cache_purge(struct cache_detail *detail) 478void cache_purge(struct cache_detail *detail)
478{ 479{
479 detail->flush_time = LONG_MAX; 480 detail->flush_time = LONG_MAX;
480 detail->nextcheck = get_seconds(); 481 detail->nextcheck = seconds_since_boot();
481 cache_flush(); 482 cache_flush();
482 detail->flush_time = 1; 483 detail->flush_time = 1;
483} 484}
@@ -506,81 +507,155 @@ EXPORT_SYMBOL_GPL(cache_purge);
506 507
507static DEFINE_SPINLOCK(cache_defer_lock); 508static DEFINE_SPINLOCK(cache_defer_lock);
508static LIST_HEAD(cache_defer_list); 509static LIST_HEAD(cache_defer_list);
509static struct list_head cache_defer_hash[DFR_HASHSIZE]; 510static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
510static int cache_defer_cnt; 511static int cache_defer_cnt;
511 512
512static int cache_defer_req(struct cache_req *req, struct cache_head *item) 513static void __unhash_deferred_req(struct cache_deferred_req *dreq)
514{
515 hlist_del_init(&dreq->hash);
516 if (!list_empty(&dreq->recent)) {
517 list_del_init(&dreq->recent);
518 cache_defer_cnt--;
519 }
520}
521
522static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
513{ 523{
514 struct cache_deferred_req *dreq, *discard;
515 int hash = DFR_HASH(item); 524 int hash = DFR_HASH(item);
516 525
517 if (cache_defer_cnt >= DFR_MAX) { 526 INIT_LIST_HEAD(&dreq->recent);
518 /* too much in the cache, randomly drop this one, 527 hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
519 * or continue and drop the oldest below 528}
520 */ 529
521 if (net_random()&1) 530static void setup_deferral(struct cache_deferred_req *dreq,
522 return -ENOMEM; 531 struct cache_head *item,
523 } 532 int count_me)
524 dreq = req->defer(req); 533{
525 if (dreq == NULL)
526 return -ENOMEM;
527 534
528 dreq->item = item; 535 dreq->item = item;
529 536
530 spin_lock(&cache_defer_lock); 537 spin_lock(&cache_defer_lock);
531 538
532 list_add(&dreq->recent, &cache_defer_list); 539 __hash_deferred_req(dreq, item);
533 540
534 if (cache_defer_hash[hash].next == NULL) 541 if (count_me) {
535 INIT_LIST_HEAD(&cache_defer_hash[hash]); 542 cache_defer_cnt++;
536 list_add(&dreq->hash, &cache_defer_hash[hash]); 543 list_add(&dreq->recent, &cache_defer_list);
537
538 /* it is in, now maybe clean up */
539 discard = NULL;
540 if (++cache_defer_cnt > DFR_MAX) {
541 discard = list_entry(cache_defer_list.prev,
542 struct cache_deferred_req, recent);
543 list_del_init(&discard->recent);
544 list_del_init(&discard->hash);
545 cache_defer_cnt--;
546 } 544 }
545
547 spin_unlock(&cache_defer_lock); 546 spin_unlock(&cache_defer_lock);
548 547
548}
549
550struct thread_deferred_req {
551 struct cache_deferred_req handle;
552 struct completion completion;
553};
554
555static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
556{
557 struct thread_deferred_req *dr =
558 container_of(dreq, struct thread_deferred_req, handle);
559 complete(&dr->completion);
560}
561
562static void cache_wait_req(struct cache_req *req, struct cache_head *item)
563{
564 struct thread_deferred_req sleeper;
565 struct cache_deferred_req *dreq = &sleeper.handle;
566
567 sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
568 dreq->revisit = cache_restart_thread;
569
570 setup_deferral(dreq, item, 0);
571
572 if (!test_bit(CACHE_PENDING, &item->flags) ||
573 wait_for_completion_interruptible_timeout(
574 &sleeper.completion, req->thread_wait) <= 0) {
575 /* The completion wasn't completed, so we need
576 * to clean up
577 */
578 spin_lock(&cache_defer_lock);
579 if (!hlist_unhashed(&sleeper.handle.hash)) {
580 __unhash_deferred_req(&sleeper.handle);
581 spin_unlock(&cache_defer_lock);
582 } else {
583 /* cache_revisit_request already removed
584 * this from the hash table, but hasn't
585 * called ->revisit yet. It will very soon
586 * and we need to wait for it.
587 */
588 spin_unlock(&cache_defer_lock);
589 wait_for_completion(&sleeper.completion);
590 }
591 }
592}
593
594static void cache_limit_defers(void)
595{
596 /* Make sure we haven't exceed the limit of allowed deferred
597 * requests.
598 */
599 struct cache_deferred_req *discard = NULL;
600
601 if (cache_defer_cnt <= DFR_MAX)
602 return;
603
604 spin_lock(&cache_defer_lock);
605
606 /* Consider removing either the first or the last */
607 if (cache_defer_cnt > DFR_MAX) {
608 if (net_random() & 1)
609 discard = list_entry(cache_defer_list.next,
610 struct cache_deferred_req, recent);
611 else
612 discard = list_entry(cache_defer_list.prev,
613 struct cache_deferred_req, recent);
614 __unhash_deferred_req(discard);
615 }
616 spin_unlock(&cache_defer_lock);
549 if (discard) 617 if (discard)
550 /* there was one too many */
551 discard->revisit(discard, 1); 618 discard->revisit(discard, 1);
619}
552 620
553 if (!test_bit(CACHE_PENDING, &item->flags)) { 621static void cache_defer_req(struct cache_req *req, struct cache_head *item)
554 /* must have just been validated... */ 622{
555 cache_revisit_request(item); 623 struct cache_deferred_req *dreq;
556 return -EAGAIN; 624
625 if (req->thread_wait) {
626 cache_wait_req(req, item);
627 if (!test_bit(CACHE_PENDING, &item->flags))
628 return;
557 } 629 }
558 return 0; 630 dreq = req->defer(req);
631 if (dreq == NULL)
632 return;
633 setup_deferral(dreq, item, 1);
634 if (!test_bit(CACHE_PENDING, &item->flags))
635 /* Bit could have been cleared before we managed to
636 * set up the deferral, so need to revisit just in case
637 */
638 cache_revisit_request(item);
639
640 cache_limit_defers();
559} 641}
560 642
561static void cache_revisit_request(struct cache_head *item) 643static void cache_revisit_request(struct cache_head *item)
562{ 644{
563 struct cache_deferred_req *dreq; 645 struct cache_deferred_req *dreq;
564 struct list_head pending; 646 struct list_head pending;
565 647 struct hlist_node *lp, *tmp;
566 struct list_head *lp;
567 int hash = DFR_HASH(item); 648 int hash = DFR_HASH(item);
568 649
569 INIT_LIST_HEAD(&pending); 650 INIT_LIST_HEAD(&pending);
570 spin_lock(&cache_defer_lock); 651 spin_lock(&cache_defer_lock);
571 652
572 lp = cache_defer_hash[hash].next; 653 hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
573 if (lp) { 654 if (dreq->item == item) {
574 while (lp != &cache_defer_hash[hash]) { 655 __unhash_deferred_req(dreq);
575 dreq = list_entry(lp, struct cache_deferred_req, hash); 656 list_add(&dreq->recent, &pending);
576 lp = lp->next;
577 if (dreq->item == item) {
578 list_del_init(&dreq->hash);
579 list_move(&dreq->recent, &pending);
580 cache_defer_cnt--;
581 }
582 } 657 }
583 } 658
584 spin_unlock(&cache_defer_lock); 659 spin_unlock(&cache_defer_lock);
585 660
586 while (!list_empty(&pending)) { 661 while (!list_empty(&pending)) {
@@ -601,9 +676,8 @@ void cache_clean_deferred(void *owner)
601 676
602 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { 677 list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
603 if (dreq->owner == owner) { 678 if (dreq->owner == owner) {
604 list_del_init(&dreq->hash); 679 __unhash_deferred_req(dreq);
605 list_move(&dreq->recent, &pending); 680 list_add(&dreq->recent, &pending);
606 cache_defer_cnt--;
607 } 681 }
608 } 682 }
609 spin_unlock(&cache_defer_lock); 683 spin_unlock(&cache_defer_lock);
@@ -902,7 +976,7 @@ static int cache_release(struct inode *inode, struct file *filp,
902 filp->private_data = NULL; 976 filp->private_data = NULL;
903 kfree(rp); 977 kfree(rp);
904 978
905 cd->last_close = get_seconds(); 979 cd->last_close = seconds_since_boot();
906 atomic_dec(&cd->readers); 980 atomic_dec(&cd->readers);
907 } 981 }
908 module_put(cd->owner); 982 module_put(cd->owner);
@@ -1015,6 +1089,23 @@ static void warn_no_listener(struct cache_detail *detail)
1015 } 1089 }
1016} 1090}
1017 1091
1092static bool cache_listeners_exist(struct cache_detail *detail)
1093{
1094 if (atomic_read(&detail->readers))
1095 return true;
1096 if (detail->last_close == 0)
1097 /* This cache was never opened */
1098 return false;
1099 if (detail->last_close < seconds_since_boot() - 30)
1100 /*
1101 * We allow for the possibility that someone might
1102 * restart a userspace daemon without restarting the
1103 * server; but after 30 seconds, we give up.
1104 */
1105 return false;
1106 return true;
1107}
1108
1018/* 1109/*
1019 * register an upcall request to user-space and queue it up for read() by the 1110 * register an upcall request to user-space and queue it up for read() by the
1020 * upcall daemon. 1111 * upcall daemon.
@@ -1033,10 +1124,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
1033 char *bp; 1124 char *bp;
1034 int len; 1125 int len;
1035 1126
1036 if (atomic_read(&detail->readers) == 0 && 1127 if (!cache_listeners_exist(detail)) {
1037 detail->last_close < get_seconds() - 30) { 1128 warn_no_listener(detail);
1038 warn_no_listener(detail); 1129 return -EINVAL;
1039 return -EINVAL;
1040 } 1130 }
1041 1131
1042 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1132 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
@@ -1095,13 +1185,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
1095 if (bp[0] == '\\' && bp[1] == 'x') { 1185 if (bp[0] == '\\' && bp[1] == 'x') {
1096 /* HEX STRING */ 1186 /* HEX STRING */
1097 bp += 2; 1187 bp += 2;
1098 while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) { 1188 while (len < bufsize) {
1099 int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1189 int h, l;
1100 bp++; 1190
1101 byte <<= 4; 1191 h = hex_to_bin(bp[0]);
1102 byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10; 1192 if (h < 0)
1103 *dest++ = byte; 1193 break;
1104 bp++; 1194
1195 l = hex_to_bin(bp[1]);
1196 if (l < 0)
1197 break;
1198
1199 *dest++ = (h << 4) | l;
1200 bp += 2;
1105 len++; 1201 len++;
1106 } 1202 }
1107 } else { 1203 } else {
@@ -1219,7 +1315,8 @@ static int c_show(struct seq_file *m, void *p)
1219 1315
1220 ifdebug(CACHE) 1316 ifdebug(CACHE)
1221 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n", 1317 seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
1222 cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags); 1318 convert_to_wallclock(cp->expiry_time),
1319 atomic_read(&cp->ref.refcount), cp->flags);
1223 cache_get(cp); 1320 cache_get(cp);
1224 if (cache_check(cd, cp, NULL)) 1321 if (cache_check(cd, cp, NULL))
1225 /* cache_check does a cache_put on failure */ 1322 /* cache_check does a cache_put on failure */
@@ -1285,7 +1382,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
1285 unsigned long p = *ppos; 1382 unsigned long p = *ppos;
1286 size_t len; 1383 size_t len;
1287 1384
1288 sprintf(tbuf, "%lu\n", cd->flush_time); 1385 sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
1289 len = strlen(tbuf); 1386 len = strlen(tbuf);
1290 if (p >= len) 1387 if (p >= len)
1291 return 0; 1388 return 0;
@@ -1303,19 +1400,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1303 struct cache_detail *cd) 1400 struct cache_detail *cd)
1304{ 1401{
1305 char tbuf[20]; 1402 char tbuf[20];
1306 char *ep; 1403 char *bp, *ep;
1307 long flushtime; 1404
1308 if (*ppos || count > sizeof(tbuf)-1) 1405 if (*ppos || count > sizeof(tbuf)-1)
1309 return -EINVAL; 1406 return -EINVAL;
1310 if (copy_from_user(tbuf, buf, count)) 1407 if (copy_from_user(tbuf, buf, count))
1311 return -EFAULT; 1408 return -EFAULT;
1312 tbuf[count] = 0; 1409 tbuf[count] = 0;
1313 flushtime = simple_strtoul(tbuf, &ep, 0); 1410 simple_strtoul(tbuf, &ep, 0);
1314 if (*ep && *ep != '\n') 1411 if (*ep && *ep != '\n')
1315 return -EINVAL; 1412 return -EINVAL;
1316 1413
1317 cd->flush_time = flushtime; 1414 bp = tbuf;
1318 cd->nextcheck = get_seconds(); 1415 cd->flush_time = get_expiry(&bp);
1416 cd->nextcheck = seconds_since_boot();
1319 cache_flush(); 1417 cache_flush();
1320 1418
1321 *ppos += count; 1419 *ppos += count;
@@ -1348,15 +1446,10 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
1348static long cache_ioctl_procfs(struct file *filp, 1446static long cache_ioctl_procfs(struct file *filp,
1349 unsigned int cmd, unsigned long arg) 1447 unsigned int cmd, unsigned long arg)
1350{ 1448{
1351 long ret;
1352 struct inode *inode = filp->f_path.dentry->d_inode; 1449 struct inode *inode = filp->f_path.dentry->d_inode;
1353 struct cache_detail *cd = PDE(inode)->data; 1450 struct cache_detail *cd = PDE(inode)->data;
1354 1451
1355 lock_kernel(); 1452 return cache_ioctl(inode, filp, cmd, arg, cd);
1356 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1357 unlock_kernel();
1358
1359 return ret;
1360} 1453}
1361 1454
1362static int cache_open_procfs(struct inode *inode, struct file *filp) 1455static int cache_open_procfs(struct inode *inode, struct file *filp)
@@ -1441,10 +1534,13 @@ static const struct file_operations cache_flush_operations_procfs = {
1441 .read = read_flush_procfs, 1534 .read = read_flush_procfs,
1442 .write = write_flush_procfs, 1535 .write = write_flush_procfs,
1443 .release = release_flush_procfs, 1536 .release = release_flush_procfs,
1537 .llseek = no_llseek,
1444}; 1538};
1445 1539
1446static void remove_cache_proc_entries(struct cache_detail *cd) 1540static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
1447{ 1541{
1542 struct sunrpc_net *sn;
1543
1448 if (cd->u.procfs.proc_ent == NULL) 1544 if (cd->u.procfs.proc_ent == NULL)
1449 return; 1545 return;
1450 if (cd->u.procfs.flush_ent) 1546 if (cd->u.procfs.flush_ent)
@@ -1454,15 +1550,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
1454 if (cd->u.procfs.content_ent) 1550 if (cd->u.procfs.content_ent)
1455 remove_proc_entry("content", cd->u.procfs.proc_ent); 1551 remove_proc_entry("content", cd->u.procfs.proc_ent);
1456 cd->u.procfs.proc_ent = NULL; 1552 cd->u.procfs.proc_ent = NULL;
1457 remove_proc_entry(cd->name, proc_net_rpc); 1553 sn = net_generic(net, sunrpc_net_id);
1554 remove_proc_entry(cd->name, sn->proc_net_rpc);
1458} 1555}
1459 1556
1460#ifdef CONFIG_PROC_FS 1557#ifdef CONFIG_PROC_FS
1461static int create_cache_proc_entries(struct cache_detail *cd) 1558static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1462{ 1559{
1463 struct proc_dir_entry *p; 1560 struct proc_dir_entry *p;
1561 struct sunrpc_net *sn;
1464 1562
1465 cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); 1563 sn = net_generic(net, sunrpc_net_id);
1564 cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
1466 if (cd->u.procfs.proc_ent == NULL) 1565 if (cd->u.procfs.proc_ent == NULL)
1467 goto out_nomem; 1566 goto out_nomem;
1468 cd->u.procfs.channel_ent = NULL; 1567 cd->u.procfs.channel_ent = NULL;
@@ -1493,11 +1592,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1493 } 1592 }
1494 return 0; 1593 return 0;
1495out_nomem: 1594out_nomem:
1496 remove_cache_proc_entries(cd); 1595 remove_cache_proc_entries(cd, net);
1497 return -ENOMEM; 1596 return -ENOMEM;
1498} 1597}
1499#else /* CONFIG_PROC_FS */ 1598#else /* CONFIG_PROC_FS */
1500static int create_cache_proc_entries(struct cache_detail *cd) 1599static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1501{ 1600{
1502 return 0; 1601 return 0;
1503} 1602}
@@ -1508,23 +1607,33 @@ void __init cache_initialize(void)
1508 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); 1607 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1509} 1608}
1510 1609
1511int cache_register(struct cache_detail *cd) 1610int cache_register_net(struct cache_detail *cd, struct net *net)
1512{ 1611{
1513 int ret; 1612 int ret;
1514 1613
1515 sunrpc_init_cache_detail(cd); 1614 sunrpc_init_cache_detail(cd);
1516 ret = create_cache_proc_entries(cd); 1615 ret = create_cache_proc_entries(cd, net);
1517 if (ret) 1616 if (ret)
1518 sunrpc_destroy_cache_detail(cd); 1617 sunrpc_destroy_cache_detail(cd);
1519 return ret; 1618 return ret;
1520} 1619}
1620
1621int cache_register(struct cache_detail *cd)
1622{
1623 return cache_register_net(cd, &init_net);
1624}
1521EXPORT_SYMBOL_GPL(cache_register); 1625EXPORT_SYMBOL_GPL(cache_register);
1522 1626
1523void cache_unregister(struct cache_detail *cd) 1627void cache_unregister_net(struct cache_detail *cd, struct net *net)
1524{ 1628{
1525 remove_cache_proc_entries(cd); 1629 remove_cache_proc_entries(cd, net);
1526 sunrpc_destroy_cache_detail(cd); 1630 sunrpc_destroy_cache_detail(cd);
1527} 1631}
1632
1633void cache_unregister(struct cache_detail *cd)
1634{
1635 cache_unregister_net(cd, &init_net);
1636}
1528EXPORT_SYMBOL_GPL(cache_unregister); 1637EXPORT_SYMBOL_GPL(cache_unregister);
1529 1638
1530static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, 1639static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
@@ -1555,13 +1664,8 @@ static long cache_ioctl_pipefs(struct file *filp,
1555{ 1664{
1556 struct inode *inode = filp->f_dentry->d_inode; 1665 struct inode *inode = filp->f_dentry->d_inode;
1557 struct cache_detail *cd = RPC_I(inode)->private; 1666 struct cache_detail *cd = RPC_I(inode)->private;
1558 long ret;
1559
1560 lock_kernel();
1561 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1562 unlock_kernel();
1563 1667
1564 return ret; 1668 return cache_ioctl(inode, filp, cmd, arg, cd);
1565} 1669}
1566 1670
1567static int cache_open_pipefs(struct inode *inode, struct file *filp) 1671static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1646,6 +1750,7 @@ const struct file_operations cache_flush_operations_pipefs = {
1646 .read = read_flush_pipefs, 1750 .read = read_flush_pipefs,
1647 .write = write_flush_pipefs, 1751 .write = write_flush_pipefs,
1648 .release = release_flush_pipefs, 1752 .release = release_flush_pipefs,
1753 .llseek = no_llseek,
1649}; 1754};
1650 1755
1651int sunrpc_cache_register_pipefs(struct dentry *parent, 1756int sunrpc_cache_register_pipefs(struct dentry *parent,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa5549079d79..9dab9573be41 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -284,6 +284,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
284 struct rpc_xprt *xprt; 284 struct rpc_xprt *xprt;
285 struct rpc_clnt *clnt; 285 struct rpc_clnt *clnt;
286 struct xprt_create xprtargs = { 286 struct xprt_create xprtargs = {
287 .net = args->net,
287 .ident = args->protocol, 288 .ident = args->protocol,
288 .srcaddr = args->saddress, 289 .srcaddr = args->saddress,
289 .dstaddr = args->address, 290 .dstaddr = args->address,
@@ -1675,7 +1676,7 @@ rpc_verify_header(struct rpc_task *task)
1675 rpcauth_invalcred(task); 1676 rpcauth_invalcred(task);
1676 /* Ensure we obtain a new XID! */ 1677 /* Ensure we obtain a new XID! */
1677 xprt_release(task); 1678 xprt_release(task);
1678 task->tk_action = call_refresh; 1679 task->tk_action = call_reserve;
1679 goto out_retry; 1680 goto out_retry;
1680 case RPC_AUTH_BADCRED: 1681 case RPC_AUTH_BADCRED:
1681 case RPC_AUTH_BADVERF: 1682 case RPC_AUTH_BADVERF:
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
new file mode 100644
index 000000000000..d013bf211cae
--- /dev/null
+++ b/net/sunrpc/netns.h
@@ -0,0 +1,19 @@
1#ifndef __SUNRPC_NETNS_H__
2#define __SUNRPC_NETNS_H__
3
4#include <net/net_namespace.h>
5#include <net/netns/generic.h>
6
7struct cache_detail;
8
9struct sunrpc_net {
10 struct proc_dir_entry *proc_net_rpc;
11 struct cache_detail *ip_map_cache;
12};
13
14extern int sunrpc_net_id;
15
16int ip_map_cache_create(struct net *);
17void ip_map_cache_destroy(struct net *);
18
19#endif
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8c8eef2b8f26..10a17a37ec4e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,9 +27,8 @@
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30#include <linux/smp_lock.h>
31 30
32static struct vfsmount *rpc_mount __read_mostly; 31static struct vfsmount *rpc_mnt __read_mostly;
33static int rpc_mount_count; 32static int rpc_mount_count;
34 33
35static struct file_system_type rpc_pipe_fs_type; 34static struct file_system_type rpc_pipe_fs_type;
@@ -204,7 +203,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
204 mutex_lock(&inode->i_mutex); 203 mutex_lock(&inode->i_mutex);
205 if (rpci->ops == NULL) 204 if (rpci->ops == NULL)
206 goto out; 205 goto out;
207 msg = (struct rpc_pipe_msg *)filp->private_data; 206 msg = filp->private_data;
208 if (msg != NULL) { 207 if (msg != NULL) {
209 spin_lock(&inode->i_lock); 208 spin_lock(&inode->i_lock);
210 msg->errno = -EAGAIN; 209 msg->errno = -EAGAIN;
@@ -309,40 +308,33 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
309 return mask; 308 return mask;
310} 309}
311 310
312static int 311static long
313rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg) 312rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
314{ 313{
315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 314 struct inode *inode = filp->f_path.dentry->d_inode;
315 struct rpc_inode *rpci = RPC_I(inode);
316 int len; 316 int len;
317 317
318 switch (cmd) { 318 switch (cmd) {
319 case FIONREAD: 319 case FIONREAD:
320 if (rpci->ops == NULL) 320 spin_lock(&inode->i_lock);
321 if (rpci->ops == NULL) {
322 spin_unlock(&inode->i_lock);
321 return -EPIPE; 323 return -EPIPE;
324 }
322 len = rpci->pipelen; 325 len = rpci->pipelen;
323 if (filp->private_data) { 326 if (filp->private_data) {
324 struct rpc_pipe_msg *msg; 327 struct rpc_pipe_msg *msg;
325 msg = (struct rpc_pipe_msg *)filp->private_data; 328 msg = filp->private_data;
326 len += msg->len - msg->copied; 329 len += msg->len - msg->copied;
327 } 330 }
331 spin_unlock(&inode->i_lock);
328 return put_user(len, (int __user *)arg); 332 return put_user(len, (int __user *)arg);
329 default: 333 default:
330 return -EINVAL; 334 return -EINVAL;
331 } 335 }
332} 336}
333 337
334static long
335rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
336{
337 long ret;
338
339 lock_kernel();
340 ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
341 unlock_kernel();
342
343 return ret;
344}
345
346static const struct file_operations rpc_pipe_fops = { 338static const struct file_operations rpc_pipe_fops = {
347 .owner = THIS_MODULE, 339 .owner = THIS_MODULE,
348 .llseek = no_llseek, 340 .llseek = no_llseek,
@@ -425,16 +417,16 @@ struct vfsmount *rpc_get_mount(void)
425{ 417{
426 int err; 418 int err;
427 419
428 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); 420 err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mnt, &rpc_mount_count);
429 if (err != 0) 421 if (err != 0)
430 return ERR_PTR(err); 422 return ERR_PTR(err);
431 return rpc_mount; 423 return rpc_mnt;
432} 424}
433EXPORT_SYMBOL_GPL(rpc_get_mount); 425EXPORT_SYMBOL_GPL(rpc_get_mount);
434 426
435void rpc_put_mount(void) 427void rpc_put_mount(void)
436{ 428{
437 simple_release_fs(&rpc_mount, &rpc_mount_count); 429 simple_release_fs(&rpc_mnt, &rpc_mount_count);
438} 430}
439EXPORT_SYMBOL_GPL(rpc_put_mount); 431EXPORT_SYMBOL_GPL(rpc_put_mount);
440 432
@@ -453,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
453 struct inode *inode = new_inode(sb); 445 struct inode *inode = new_inode(sb);
454 if (!inode) 446 if (!inode)
455 return NULL; 447 return NULL;
448 inode->i_ino = get_next_ino();
456 inode->i_mode = mode; 449 inode->i_mode = mode;
457 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 450 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
458 switch(mode & S_IFMT) { 451 switch(mode & S_IFMT) {
@@ -1025,17 +1018,17 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1025 return 0; 1018 return 0;
1026} 1019}
1027 1020
1028static int 1021static struct dentry *
1029rpc_get_sb(struct file_system_type *fs_type, 1022rpc_mount(struct file_system_type *fs_type,
1030 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 1023 int flags, const char *dev_name, void *data)
1031{ 1024{
1032 return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); 1025 return mount_single(fs_type, flags, data, rpc_fill_super);
1033} 1026}
1034 1027
1035static struct file_system_type rpc_pipe_fs_type = { 1028static struct file_system_type rpc_pipe_fs_type = {
1036 .owner = THIS_MODULE, 1029 .owner = THIS_MODULE,
1037 .name = "rpc_pipefs", 1030 .name = "rpc_pipefs",
1038 .get_sb = rpc_get_sb, 1031 .mount = rpc_mount,
1039 .kill_sb = kill_litter_super, 1032 .kill_sb = kill_litter_super,
1040}; 1033};
1041 1034
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index dac219a56ae1..fa6d7ca2c851 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -177,6 +177,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex);
177static int rpcb_create_local(void) 177static int rpcb_create_local(void)
178{ 178{
179 struct rpc_create_args args = { 179 struct rpc_create_args args = {
180 .net = &init_net,
180 .protocol = XPRT_TRANSPORT_TCP, 181 .protocol = XPRT_TRANSPORT_TCP,
181 .address = (struct sockaddr *)&rpcb_inaddr_loopback, 182 .address = (struct sockaddr *)&rpcb_inaddr_loopback,
182 .addrsize = sizeof(rpcb_inaddr_loopback), 183 .addrsize = sizeof(rpcb_inaddr_loopback),
@@ -211,8 +212,9 @@ static int rpcb_create_local(void)
211 */ 212 */
212 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); 213 clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
213 if (IS_ERR(clnt4)) { 214 if (IS_ERR(clnt4)) {
214 dprintk("RPC: failed to create local rpcbind v4 " 215 dprintk("RPC: failed to bind second program to "
215 "cleint (errno %ld).\n", PTR_ERR(clnt4)); 216 "rpcbind v4 client (errno %ld).\n",
217 PTR_ERR(clnt4));
216 clnt4 = NULL; 218 clnt4 = NULL;
217 } 219 }
218 220
@@ -228,6 +230,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
228 size_t salen, int proto, u32 version) 230 size_t salen, int proto, u32 version)
229{ 231{
230 struct rpc_create_args args = { 232 struct rpc_create_args args = {
233 .net = &init_net,
231 .protocol = proto, 234 .protocol = proto,
232 .address = srvaddr, 235 .address = srvaddr,
233 .addrsize = salen, 236 .addrsize = salen,
@@ -247,7 +250,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
247 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); 250 ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
248 break; 251 break;
249 default: 252 default:
250 return NULL; 253 return ERR_PTR(-EAFNOSUPPORT);
251 } 254 }
252 255
253 return rpc_create(&args); 256 return rpc_create(&args);
@@ -475,57 +478,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
475 return -EAFNOSUPPORT; 478 return -EAFNOSUPPORT;
476} 479}
477 480
478/**
479 * rpcb_getport_sync - obtain the port for an RPC service on a given host
480 * @sin: address of remote peer
481 * @prog: RPC program number to bind
482 * @vers: RPC version number to bind
483 * @prot: transport protocol to use to make this request
484 *
485 * Return value is the requested advertised port number,
486 * or a negative errno value.
487 *
488 * Called from outside the RPC client in a synchronous task context.
489 * Uses default timeout parameters specified by underlying transport.
490 *
491 * XXX: Needs to support IPv6
492 */
493int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
494{
495 struct rpcbind_args map = {
496 .r_prog = prog,
497 .r_vers = vers,
498 .r_prot = prot,
499 .r_port = 0,
500 };
501 struct rpc_message msg = {
502 .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT],
503 .rpc_argp = &map,
504 .rpc_resp = &map,
505 };
506 struct rpc_clnt *rpcb_clnt;
507 int status;
508
509 dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
510 __func__, &sin->sin_addr.s_addr, prog, vers, prot);
511
512 rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
513 sizeof(*sin), prot, RPCBVERS_2);
514 if (IS_ERR(rpcb_clnt))
515 return PTR_ERR(rpcb_clnt);
516
517 status = rpc_call_sync(rpcb_clnt, &msg, 0);
518 rpc_shutdown_client(rpcb_clnt);
519
520 if (status >= 0) {
521 if (map.r_port != 0)
522 return map.r_port;
523 status = -EACCES;
524 }
525 return status;
526}
527EXPORT_SYMBOL_GPL(rpcb_getport_sync);
528
529static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) 481static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
530{ 482{
531 struct rpc_message msg = { 483 struct rpc_message msg = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a5..243fc09b164e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
376 spin_lock_bh(&queue->lock); 376 spin_lock_bh(&queue->lock);
377 res = queue->qlen; 377 res = queue->qlen;
378 spin_unlock_bh(&queue->lock); 378 spin_unlock_bh(&queue->lock);
379 return (res == 0); 379 return res == 0;
380} 380}
381EXPORT_SYMBOL_GPL(rpc_queue_empty); 381EXPORT_SYMBOL_GPL(rpc_queue_empty);
382 382
@@ -908,7 +908,7 @@ static int rpciod_start(void)
908 * Create the rpciod thread and wait for it to start. 908 * Create the rpciod thread and wait for it to start.
909 */ 909 */
910 dprintk("RPC: creating workqueue rpciod\n"); 910 dprintk("RPC: creating workqueue rpciod\n");
911 wq = create_workqueue("rpciod"); 911 wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
912 rpciod_workqueue = wq; 912 rpciod_workqueue = wq;
913 return rpciod_workqueue != NULL; 913 return rpciod_workqueue != NULL;
914} 914}
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a3..f71a73107ae9 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
22#include <linux/sunrpc/clnt.h> 22#include <linux/sunrpc/clnt.h>
23#include <linux/sunrpc/svcsock.h> 23#include <linux/sunrpc/svcsock.h>
24#include <linux/sunrpc/metrics.h> 24#include <linux/sunrpc/metrics.h>
25#include <net/net_namespace.h>
26 25
27#define RPCDBG_FACILITY RPCDBG_MISC 26#include "netns.h"
28 27
29struct proc_dir_entry *proc_net_rpc = NULL; 28#define RPCDBG_FACILITY RPCDBG_MISC
30 29
31/* 30/*
32 * Get RPC client stats 31 * Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
218static inline struct proc_dir_entry * 217static inline struct proc_dir_entry *
219do_register(const char *name, void *data, const struct file_operations *fops) 218do_register(const char *name, void *data, const struct file_operations *fops)
220{ 219{
221 rpc_proc_init(); 220 struct sunrpc_net *sn;
222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 221
224 return proc_create_data(name, 0, proc_net_rpc, fops, data); 222 dprintk("RPC: registering /proc/net/rpc/%s\n", name);
223 sn = net_generic(&init_net, sunrpc_net_id);
224 return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
225} 225}
226 226
227struct proc_dir_entry * 227struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
234void 234void
235rpc_proc_unregister(const char *name) 235rpc_proc_unregister(const char *name)
236{ 236{
237 remove_proc_entry(name, proc_net_rpc); 237 struct sunrpc_net *sn;
238
239 sn = net_generic(&init_net, sunrpc_net_id);
240 remove_proc_entry(name, sn->proc_net_rpc);
238} 241}
239EXPORT_SYMBOL_GPL(rpc_proc_unregister); 242EXPORT_SYMBOL_GPL(rpc_proc_unregister);
240 243
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
248void 251void
249svc_proc_unregister(const char *name) 252svc_proc_unregister(const char *name)
250{ 253{
251 remove_proc_entry(name, proc_net_rpc); 254 struct sunrpc_net *sn;
255
256 sn = net_generic(&init_net, sunrpc_net_id);
257 remove_proc_entry(name, sn->proc_net_rpc);
252} 258}
253EXPORT_SYMBOL_GPL(svc_proc_unregister); 259EXPORT_SYMBOL_GPL(svc_proc_unregister);
254 260
255void 261int rpc_proc_init(struct net *net)
256rpc_proc_init(void)
257{ 262{
263 struct sunrpc_net *sn;
264
258 dprintk("RPC: registering /proc/net/rpc\n"); 265 dprintk("RPC: registering /proc/net/rpc\n");
259 if (!proc_net_rpc) 266 sn = net_generic(net, sunrpc_net_id);
260 proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); 267 sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
268 if (sn->proc_net_rpc == NULL)
269 return -ENOMEM;
270
271 return 0;
261} 272}
262 273
263void 274void rpc_proc_exit(struct net *net)
264rpc_proc_exit(void)
265{ 275{
266 dprintk("RPC: unregistering /proc/net/rpc\n"); 276 dprintk("RPC: unregistering /proc/net/rpc\n");
267 if (proc_net_rpc) { 277 remove_proc_entry("rpc", net->proc_net);
268 proc_net_rpc = NULL;
269 remove_proc_entry("rpc", init_net.proc_net);
270 }
271} 278}
272 279
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index c0d085013a2b..9d0809160994 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,7 +22,44 @@
22#include <linux/sunrpc/rpc_pipe_fs.h> 22#include <linux/sunrpc/rpc_pipe_fs.h>
23#include <linux/sunrpc/xprtsock.h> 23#include <linux/sunrpc/xprtsock.h>
24 24
25extern struct cache_detail ip_map_cache, unix_gid_cache; 25#include "netns.h"
26
27int sunrpc_net_id;
28
29static __net_init int sunrpc_init_net(struct net *net)
30{
31 int err;
32
33 err = rpc_proc_init(net);
34 if (err)
35 goto err_proc;
36
37 err = ip_map_cache_create(net);
38 if (err)
39 goto err_ipmap;
40
41 return 0;
42
43err_ipmap:
44 rpc_proc_exit(net);
45err_proc:
46 return err;
47}
48
49static __net_exit void sunrpc_exit_net(struct net *net)
50{
51 ip_map_cache_destroy(net);
52 rpc_proc_exit(net);
53}
54
55static struct pernet_operations sunrpc_net_ops = {
56 .init = sunrpc_init_net,
57 .exit = sunrpc_exit_net,
58 .id = &sunrpc_net_id,
59 .size = sizeof(struct sunrpc_net),
60};
61
62extern struct cache_detail unix_gid_cache;
26 63
27extern void cleanup_rpcb_clnt(void); 64extern void cleanup_rpcb_clnt(void);
28 65
@@ -38,18 +75,22 @@ init_sunrpc(void)
38 err = rpcauth_init_module(); 75 err = rpcauth_init_module();
39 if (err) 76 if (err)
40 goto out3; 77 goto out3;
78
79 cache_initialize();
80
81 err = register_pernet_subsys(&sunrpc_net_ops);
82 if (err)
83 goto out4;
41#ifdef RPC_DEBUG 84#ifdef RPC_DEBUG
42 rpc_register_sysctl(); 85 rpc_register_sysctl();
43#endif 86#endif
44#ifdef CONFIG_PROC_FS
45 rpc_proc_init();
46#endif
47 cache_initialize();
48 cache_register(&ip_map_cache);
49 cache_register(&unix_gid_cache); 87 cache_register(&unix_gid_cache);
50 svc_init_xprt_sock(); /* svc sock transport */ 88 svc_init_xprt_sock(); /* svc sock transport */
51 init_socket_xprt(); /* clnt sock transport */ 89 init_socket_xprt(); /* clnt sock transport */
52 return 0; 90 return 0;
91
92out4:
93 rpcauth_remove_module();
53out3: 94out3:
54 rpc_destroy_mempool(); 95 rpc_destroy_mempool();
55out2: 96out2:
@@ -67,14 +108,11 @@ cleanup_sunrpc(void)
67 svc_cleanup_xprt_sock(); 108 svc_cleanup_xprt_sock();
68 unregister_rpc_pipefs(); 109 unregister_rpc_pipefs();
69 rpc_destroy_mempool(); 110 rpc_destroy_mempool();
70 cache_unregister(&ip_map_cache);
71 cache_unregister(&unix_gid_cache); 111 cache_unregister(&unix_gid_cache);
112 unregister_pernet_subsys(&sunrpc_net_ops);
72#ifdef RPC_DEBUG 113#ifdef RPC_DEBUG
73 rpc_unregister_sysctl(); 114 rpc_unregister_sysctl();
74#endif 115#endif
75#ifdef CONFIG_PROC_FS
76 rpc_proc_exit();
77#endif
78 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 116 rcu_barrier(); /* Wait for completion of call_rcu()'s */
79} 117}
80MODULE_LICENSE("GPL"); 118MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d9017d64597e..6359c42c4941 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1055,6 +1055,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
1055 goto err_bad; 1055 goto err_bad;
1056 case SVC_DENIED: 1056 case SVC_DENIED:
1057 goto err_bad_auth; 1057 goto err_bad_auth;
1058 case SVC_CLOSE:
1059 if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1060 svc_close_xprt(rqstp->rq_xprt);
1058 case SVC_DROP: 1061 case SVC_DROP:
1059 goto dropit; 1062 goto dropit;
1060 case SVC_COMPLETE: 1063 case SVC_COMPLETE:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd8..c82fe739fbdc 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -100,16 +100,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class);
100 */ 100 */
101int svc_print_xprts(char *buf, int maxlen) 101int svc_print_xprts(char *buf, int maxlen)
102{ 102{
103 struct list_head *le; 103 struct svc_xprt_class *xcl;
104 char tmpstr[80]; 104 char tmpstr[80];
105 int len = 0; 105 int len = 0;
106 buf[0] = '\0'; 106 buf[0] = '\0';
107 107
108 spin_lock(&svc_xprt_class_lock); 108 spin_lock(&svc_xprt_class_lock);
109 list_for_each(le, &svc_xprt_class_list) { 109 list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
110 int slen; 110 int slen;
111 struct svc_xprt_class *xcl =
112 list_entry(le, struct svc_xprt_class, xcl_list);
113 111
114 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); 112 sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload);
115 slen = strlen(tmpstr); 113 slen = strlen(tmpstr);
@@ -128,9 +126,9 @@ static void svc_xprt_free(struct kref *kref)
128 struct svc_xprt *xprt = 126 struct svc_xprt *xprt =
129 container_of(kref, struct svc_xprt, xpt_ref); 127 container_of(kref, struct svc_xprt, xpt_ref);
130 struct module *owner = xprt->xpt_class->xcl_owner; 128 struct module *owner = xprt->xpt_class->xcl_owner;
131 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && 129 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))
132 xprt->xpt_auth_cache != NULL) 130 svcauth_unix_info_release(xprt);
133 svcauth_unix_info_release(xprt->xpt_auth_cache); 131 put_net(xprt->xpt_net);
134 xprt->xpt_ops->xpo_free(xprt); 132 xprt->xpt_ops->xpo_free(xprt);
135 module_put(owner); 133 module_put(owner);
136} 134}
@@ -156,15 +154,18 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
156 INIT_LIST_HEAD(&xprt->xpt_list); 154 INIT_LIST_HEAD(&xprt->xpt_list);
157 INIT_LIST_HEAD(&xprt->xpt_ready); 155 INIT_LIST_HEAD(&xprt->xpt_ready);
158 INIT_LIST_HEAD(&xprt->xpt_deferred); 156 INIT_LIST_HEAD(&xprt->xpt_deferred);
157 INIT_LIST_HEAD(&xprt->xpt_users);
159 mutex_init(&xprt->xpt_mutex); 158 mutex_init(&xprt->xpt_mutex);
160 spin_lock_init(&xprt->xpt_lock); 159 spin_lock_init(&xprt->xpt_lock);
161 set_bit(XPT_BUSY, &xprt->xpt_flags); 160 set_bit(XPT_BUSY, &xprt->xpt_flags);
162 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); 161 rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
162 xprt->xpt_net = get_net(&init_net);
163} 163}
164EXPORT_SYMBOL_GPL(svc_xprt_init); 164EXPORT_SYMBOL_GPL(svc_xprt_init);
165 165
166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 166static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
167 struct svc_serv *serv, 167 struct svc_serv *serv,
168 struct net *net,
168 const int family, 169 const int family,
169 const unsigned short port, 170 const unsigned short port,
170 int flags) 171 int flags)
@@ -199,12 +200,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
199 return ERR_PTR(-EAFNOSUPPORT); 200 return ERR_PTR(-EAFNOSUPPORT);
200 } 201 }
201 202
202 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 203 return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
203} 204}
204 205
205int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, 206int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
206 const int family, const unsigned short port, 207 struct net *net, const int family,
207 int flags) 208 const unsigned short port, int flags)
208{ 209{
209 struct svc_xprt_class *xcl; 210 struct svc_xprt_class *xcl;
210 211
@@ -220,7 +221,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
220 goto err; 221 goto err;
221 222
222 spin_unlock(&svc_xprt_class_lock); 223 spin_unlock(&svc_xprt_class_lock);
223 newxprt = __svc_xpo_create(xcl, serv, family, port, flags); 224 newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
224 if (IS_ERR(newxprt)) { 225 if (IS_ERR(newxprt)) {
225 module_put(xcl->xcl_owner); 226 module_put(xcl->xcl_owner);
226 return PTR_ERR(newxprt); 227 return PTR_ERR(newxprt);
@@ -329,12 +330,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
329 "svc_xprt_enqueue: " 330 "svc_xprt_enqueue: "
330 "threads and transports both waiting??\n"); 331 "threads and transports both waiting??\n");
331 332
332 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
333 /* Don't enqueue dead transports */
334 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
335 goto out_unlock;
336 }
337
338 pool->sp_stats.packets++; 333 pool->sp_stats.packets++;
339 334
340 /* Mark transport as busy. It will remain in this state until 335 /* Mark transport as busy. It will remain in this state until
@@ -651,6 +646,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
651 if (signalled() || kthread_should_stop()) 646 if (signalled() || kthread_should_stop())
652 return -EINTR; 647 return -EINTR;
653 648
649 /* Normally we will wait up to 5 seconds for any required
650 * cache information to be provided.
651 */
652 rqstp->rq_chandle.thread_wait = 5*HZ;
653
654 spin_lock_bh(&pool->sp_lock); 654 spin_lock_bh(&pool->sp_lock);
655 xprt = svc_xprt_dequeue(pool); 655 xprt = svc_xprt_dequeue(pool);
656 if (xprt) { 656 if (xprt) {
@@ -658,6 +658,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
658 svc_xprt_get(xprt); 658 svc_xprt_get(xprt);
659 rqstp->rq_reserved = serv->sv_max_mesg; 659 rqstp->rq_reserved = serv->sv_max_mesg;
660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 660 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
661
662 /* As there is a shortage of threads and this request
663 * had to be queued, don't allow the thread to wait so
664 * long for cache updates.
665 */
666 rqstp->rq_chandle.thread_wait = 1*HZ;
661 } else { 667 } else {
662 /* No data pending. Go to sleep */ 668 /* No data pending. Go to sleep */
663 svc_thread_enqueue(pool, rqstp); 669 svc_thread_enqueue(pool, rqstp);
@@ -868,6 +874,19 @@ static void svc_age_temp_xprts(unsigned long closure)
868 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); 874 mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
869} 875}
870 876
877static void call_xpt_users(struct svc_xprt *xprt)
878{
879 struct svc_xpt_user *u;
880
881 spin_lock(&xprt->xpt_lock);
882 while (!list_empty(&xprt->xpt_users)) {
883 u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
884 list_del(&u->list);
885 u->callback(u);
886 }
887 spin_unlock(&xprt->xpt_lock);
888}
889
871/* 890/*
872 * Remove a dead transport 891 * Remove a dead transport
873 */ 892 */
@@ -878,7 +897,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
878 897
879 /* Only do this once */ 898 /* Only do this once */
880 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) 899 if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
881 return; 900 BUG();
882 901
883 dprintk("svc: svc_delete_xprt(%p)\n", xprt); 902 dprintk("svc: svc_delete_xprt(%p)\n", xprt);
884 xprt->xpt_ops->xpo_detach(xprt); 903 xprt->xpt_ops->xpo_detach(xprt);
@@ -900,6 +919,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
900 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 919 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
901 kfree(dr); 920 kfree(dr);
902 921
922 call_xpt_users(xprt);
903 svc_xprt_put(xprt); 923 svc_xprt_put(xprt);
904} 924}
905 925
@@ -910,10 +930,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
910 /* someone else will have to effect the close */ 930 /* someone else will have to effect the close */
911 return; 931 return;
912 932
913 svc_xprt_get(xprt);
914 svc_delete_xprt(xprt); 933 svc_delete_xprt(xprt);
915 clear_bit(XPT_BUSY, &xprt->xpt_flags);
916 svc_xprt_put(xprt);
917} 934}
918EXPORT_SYMBOL_GPL(svc_close_xprt); 935EXPORT_SYMBOL_GPL(svc_close_xprt);
919 936
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 207311610988..560677d187f1 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -18,6 +18,8 @@
18 18
19#include <linux/sunrpc/clnt.h> 19#include <linux/sunrpc/clnt.h>
20 20
21#include "netns.h"
22
21/* 23/*
22 * AUTHUNIX and AUTHNULL credentials are both handled here. 24 * AUTHUNIX and AUTHNULL credentials are both handled here.
23 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid 25 * AUTHNULL is treated just like AUTHUNIX except that the uid/gid
@@ -92,7 +94,6 @@ struct ip_map {
92 struct unix_domain *m_client; 94 struct unix_domain *m_client;
93 int m_add_change; 95 int m_add_change;
94}; 96};
95static struct cache_head *ip_table[IP_HASHMAX];
96 97
97static void ip_map_put(struct kref *kref) 98static void ip_map_put(struct kref *kref)
98{ 99{
@@ -178,8 +179,8 @@ static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
178 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); 179 return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
179} 180}
180 181
181static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); 182static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
182static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); 183static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
183 184
184static int ip_map_parse(struct cache_detail *cd, 185static int ip_map_parse(struct cache_detail *cd,
185 char *mesg, int mlen) 186 char *mesg, int mlen)
@@ -219,10 +220,9 @@ static int ip_map_parse(struct cache_detail *cd,
219 switch (address.sa.sa_family) { 220 switch (address.sa.sa_family) {
220 case AF_INET: 221 case AF_INET:
221 /* Form a mapped IPv4 address in sin6 */ 222 /* Form a mapped IPv4 address in sin6 */
222 memset(&sin6, 0, sizeof(sin6));
223 sin6.sin6_family = AF_INET6; 223 sin6.sin6_family = AF_INET6;
224 sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); 224 ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
225 sin6.sin6_addr.s6_addr32[3] = address.s4.sin_addr.s_addr; 225 &sin6.sin6_addr);
226 break; 226 break;
227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 227#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 case AF_INET6: 228 case AF_INET6:
@@ -249,9 +249,9 @@ static int ip_map_parse(struct cache_detail *cd,
249 dom = NULL; 249 dom = NULL;
250 250
251 /* IPv6 scope IDs are ignored for now */ 251 /* IPv6 scope IDs are ignored for now */
252 ipmp = ip_map_lookup(class, &sin6.sin6_addr); 252 ipmp = __ip_map_lookup(cd, class, &sin6.sin6_addr);
253 if (ipmp) { 253 if (ipmp) {
254 err = ip_map_update(ipmp, 254 err = __ip_map_update(cd, ipmp,
255 container_of(dom, struct unix_domain, h), 255 container_of(dom, struct unix_domain, h),
256 expiry); 256 expiry);
257 } else 257 } else
@@ -294,29 +294,15 @@ static int ip_map_show(struct seq_file *m,
294} 294}
295 295
296 296
297struct cache_detail ip_map_cache = { 297static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
298 .owner = THIS_MODULE, 298 struct in6_addr *addr)
299 .hash_size = IP_HASHMAX,
300 .hash_table = ip_table,
301 .name = "auth.unix.ip",
302 .cache_put = ip_map_put,
303 .cache_upcall = ip_map_upcall,
304 .cache_parse = ip_map_parse,
305 .cache_show = ip_map_show,
306 .match = ip_map_match,
307 .init = ip_map_init,
308 .update = update,
309 .alloc = ip_map_alloc,
310};
311
312static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
313{ 299{
314 struct ip_map ip; 300 struct ip_map ip;
315 struct cache_head *ch; 301 struct cache_head *ch;
316 302
317 strcpy(ip.m_class, class); 303 strcpy(ip.m_class, class);
318 ipv6_addr_copy(&ip.m_addr, addr); 304 ipv6_addr_copy(&ip.m_addr, addr);
319 ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h, 305 ch = sunrpc_cache_lookup(cd, &ip.h,
320 hash_str(class, IP_HASHBITS) ^ 306 hash_str(class, IP_HASHBITS) ^
321 hash_ip6(*addr)); 307 hash_ip6(*addr));
322 308
@@ -326,7 +312,17 @@ static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
326 return NULL; 312 return NULL;
327} 313}
328 314
329static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry) 315static inline struct ip_map *ip_map_lookup(struct net *net, char *class,
316 struct in6_addr *addr)
317{
318 struct sunrpc_net *sn;
319
320 sn = net_generic(net, sunrpc_net_id);
321 return __ip_map_lookup(sn->ip_map_cache, class, addr);
322}
323
324static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
325 struct unix_domain *udom, time_t expiry)
330{ 326{
331 struct ip_map ip; 327 struct ip_map ip;
332 struct cache_head *ch; 328 struct cache_head *ch;
@@ -344,17 +340,25 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
344 ip.m_add_change++; 340 ip.m_add_change++;
345 } 341 }
346 ip.h.expiry_time = expiry; 342 ip.h.expiry_time = expiry;
347 ch = sunrpc_cache_update(&ip_map_cache, 343 ch = sunrpc_cache_update(cd, &ip.h, &ipm->h,
348 &ip.h, &ipm->h,
349 hash_str(ipm->m_class, IP_HASHBITS) ^ 344 hash_str(ipm->m_class, IP_HASHBITS) ^
350 hash_ip6(ipm->m_addr)); 345 hash_ip6(ipm->m_addr));
351 if (!ch) 346 if (!ch)
352 return -ENOMEM; 347 return -ENOMEM;
353 cache_put(ch, &ip_map_cache); 348 cache_put(ch, cd);
354 return 0; 349 return 0;
355} 350}
356 351
357int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) 352static inline int ip_map_update(struct net *net, struct ip_map *ipm,
353 struct unix_domain *udom, time_t expiry)
354{
355 struct sunrpc_net *sn;
356
357 sn = net_generic(net, sunrpc_net_id);
358 return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
359}
360
361int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom)
358{ 362{
359 struct unix_domain *udom; 363 struct unix_domain *udom;
360 struct ip_map *ipmp; 364 struct ip_map *ipmp;
@@ -362,10 +366,10 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
362 if (dom->flavour != &svcauth_unix) 366 if (dom->flavour != &svcauth_unix)
363 return -EINVAL; 367 return -EINVAL;
364 udom = container_of(dom, struct unix_domain, h); 368 udom = container_of(dom, struct unix_domain, h);
365 ipmp = ip_map_lookup("nfsd", addr); 369 ipmp = ip_map_lookup(net, "nfsd", addr);
366 370
367 if (ipmp) 371 if (ipmp)
368 return ip_map_update(ipmp, udom, NEVER); 372 return ip_map_update(net, ipmp, udom, NEVER);
369 else 373 else
370 return -ENOMEM; 374 return -ENOMEM;
371} 375}
@@ -383,16 +387,18 @@ int auth_unix_forget_old(struct auth_domain *dom)
383} 387}
384EXPORT_SYMBOL_GPL(auth_unix_forget_old); 388EXPORT_SYMBOL_GPL(auth_unix_forget_old);
385 389
386struct auth_domain *auth_unix_lookup(struct in6_addr *addr) 390struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr)
387{ 391{
388 struct ip_map *ipm; 392 struct ip_map *ipm;
389 struct auth_domain *rv; 393 struct auth_domain *rv;
394 struct sunrpc_net *sn;
390 395
391 ipm = ip_map_lookup("nfsd", addr); 396 sn = net_generic(net, sunrpc_net_id);
397 ipm = ip_map_lookup(net, "nfsd", addr);
392 398
393 if (!ipm) 399 if (!ipm)
394 return NULL; 400 return NULL;
395 if (cache_check(&ip_map_cache, &ipm->h, NULL)) 401 if (cache_check(sn->ip_map_cache, &ipm->h, NULL))
396 return NULL; 402 return NULL;
397 403
398 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { 404 if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
@@ -403,22 +409,29 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
403 rv = &ipm->m_client->h; 409 rv = &ipm->m_client->h;
404 kref_get(&rv->ref); 410 kref_get(&rv->ref);
405 } 411 }
406 cache_put(&ipm->h, &ip_map_cache); 412 cache_put(&ipm->h, sn->ip_map_cache);
407 return rv; 413 return rv;
408} 414}
409EXPORT_SYMBOL_GPL(auth_unix_lookup); 415EXPORT_SYMBOL_GPL(auth_unix_lookup);
410 416
411void svcauth_unix_purge(void) 417void svcauth_unix_purge(void)
412{ 418{
413 cache_purge(&ip_map_cache); 419 struct net *net;
420
421 for_each_net(net) {
422 struct sunrpc_net *sn;
423
424 sn = net_generic(net, sunrpc_net_id);
425 cache_purge(sn->ip_map_cache);
426 }
414} 427}
415EXPORT_SYMBOL_GPL(svcauth_unix_purge); 428EXPORT_SYMBOL_GPL(svcauth_unix_purge);
416 429
417static inline struct ip_map * 430static inline struct ip_map *
418ip_map_cached_get(struct svc_rqst *rqstp) 431ip_map_cached_get(struct svc_xprt *xprt)
419{ 432{
420 struct ip_map *ipm = NULL; 433 struct ip_map *ipm = NULL;
421 struct svc_xprt *xprt = rqstp->rq_xprt; 434 struct sunrpc_net *sn;
422 435
423 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 436 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
424 spin_lock(&xprt->xpt_lock); 437 spin_lock(&xprt->xpt_lock);
@@ -430,9 +443,10 @@ ip_map_cached_get(struct svc_rqst *rqstp)
430 * remembered, e.g. by a second mount from the 443 * remembered, e.g. by a second mount from the
431 * same IP address. 444 * same IP address.
432 */ 445 */
446 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
433 xprt->xpt_auth_cache = NULL; 447 xprt->xpt_auth_cache = NULL;
434 spin_unlock(&xprt->xpt_lock); 448 spin_unlock(&xprt->xpt_lock);
435 cache_put(&ipm->h, &ip_map_cache); 449 cache_put(&ipm->h, sn->ip_map_cache);
436 return NULL; 450 return NULL;
437 } 451 }
438 cache_get(&ipm->h); 452 cache_get(&ipm->h);
@@ -443,10 +457,8 @@ ip_map_cached_get(struct svc_rqst *rqstp)
443} 457}
444 458
445static inline void 459static inline void
446ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) 460ip_map_cached_put(struct svc_xprt *xprt, struct ip_map *ipm)
447{ 461{
448 struct svc_xprt *xprt = rqstp->rq_xprt;
449
450 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { 462 if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) {
451 spin_lock(&xprt->xpt_lock); 463 spin_lock(&xprt->xpt_lock);
452 if (xprt->xpt_auth_cache == NULL) { 464 if (xprt->xpt_auth_cache == NULL) {
@@ -456,15 +468,26 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
456 } 468 }
457 spin_unlock(&xprt->xpt_lock); 469 spin_unlock(&xprt->xpt_lock);
458 } 470 }
459 if (ipm) 471 if (ipm) {
460 cache_put(&ipm->h, &ip_map_cache); 472 struct sunrpc_net *sn;
473
474 sn = net_generic(xprt->xpt_net, sunrpc_net_id);
475 cache_put(&ipm->h, sn->ip_map_cache);
476 }
461} 477}
462 478
463void 479void
464svcauth_unix_info_release(void *info) 480svcauth_unix_info_release(struct svc_xprt *xpt)
465{ 481{
466 struct ip_map *ipm = info; 482 struct ip_map *ipm;
467 cache_put(&ipm->h, &ip_map_cache); 483
484 ipm = xpt->xpt_auth_cache;
485 if (ipm != NULL) {
486 struct sunrpc_net *sn;
487
488 sn = net_generic(xpt->xpt_net, sunrpc_net_id);
489 cache_put(&ipm->h, sn->ip_map_cache);
490 }
468} 491}
469 492
470/**************************************************************************** 493/****************************************************************************
@@ -674,6 +697,8 @@ static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
674 switch (ret) { 697 switch (ret) {
675 case -ENOENT: 698 case -ENOENT:
676 return ERR_PTR(-ENOENT); 699 return ERR_PTR(-ENOENT);
700 case -ETIMEDOUT:
701 return ERR_PTR(-ESHUTDOWN);
677 case 0: 702 case 0:
678 gi = get_group_info(ug->gi); 703 gi = get_group_info(ug->gi);
679 cache_put(&ug->h, &unix_gid_cache); 704 cache_put(&ug->h, &unix_gid_cache);
@@ -691,6 +716,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
691 struct ip_map *ipm; 716 struct ip_map *ipm;
692 struct group_info *gi; 717 struct group_info *gi;
693 struct svc_cred *cred = &rqstp->rq_cred; 718 struct svc_cred *cred = &rqstp->rq_cred;
719 struct svc_xprt *xprt = rqstp->rq_xprt;
720 struct net *net = xprt->xpt_net;
721 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
694 722
695 switch (rqstp->rq_addr.ss_family) { 723 switch (rqstp->rq_addr.ss_family) {
696 case AF_INET: 724 case AF_INET:
@@ -709,26 +737,27 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
709 if (rqstp->rq_proc == 0) 737 if (rqstp->rq_proc == 0)
710 return SVC_OK; 738 return SVC_OK;
711 739
712 ipm = ip_map_cached_get(rqstp); 740 ipm = ip_map_cached_get(xprt);
713 if (ipm == NULL) 741 if (ipm == NULL)
714 ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, 742 ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
715 &sin6->sin6_addr); 743 &sin6->sin6_addr);
716 744
717 if (ipm == NULL) 745 if (ipm == NULL)
718 return SVC_DENIED; 746 return SVC_DENIED;
719 747
720 switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { 748 switch (cache_check(sn->ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
721 default: 749 default:
722 BUG(); 750 BUG();
723 case -EAGAIN:
724 case -ETIMEDOUT: 751 case -ETIMEDOUT:
752 return SVC_CLOSE;
753 case -EAGAIN:
725 return SVC_DROP; 754 return SVC_DROP;
726 case -ENOENT: 755 case -ENOENT:
727 return SVC_DENIED; 756 return SVC_DENIED;
728 case 0: 757 case 0:
729 rqstp->rq_client = &ipm->m_client->h; 758 rqstp->rq_client = &ipm->m_client->h;
730 kref_get(&rqstp->rq_client->ref); 759 kref_get(&rqstp->rq_client->ref);
731 ip_map_cached_put(rqstp, ipm); 760 ip_map_cached_put(xprt, ipm);
732 break; 761 break;
733 } 762 }
734 763
@@ -736,6 +765,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
736 switch (PTR_ERR(gi)) { 765 switch (PTR_ERR(gi)) {
737 case -EAGAIN: 766 case -EAGAIN:
738 return SVC_DROP; 767 return SVC_DROP;
768 case -ESHUTDOWN:
769 return SVC_CLOSE;
739 case -ENOENT: 770 case -ENOENT:
740 break; 771 break;
741 default: 772 default:
@@ -776,7 +807,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
776 cred->cr_gid = (gid_t) -1; 807 cred->cr_gid = (gid_t) -1;
777 cred->cr_group_info = groups_alloc(0); 808 cred->cr_group_info = groups_alloc(0);
778 if (cred->cr_group_info == NULL) 809 if (cred->cr_group_info == NULL)
779 return SVC_DROP; /* kmalloc failure - client must retry */ 810 return SVC_CLOSE; /* kmalloc failure - client must retry */
780 811
781 /* Put NULL verifier */ 812 /* Put NULL verifier */
782 svc_putnl(resv, RPC_AUTH_NULL); 813 svc_putnl(resv, RPC_AUTH_NULL);
@@ -840,7 +871,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
840 goto badcred; 871 goto badcred;
841 cred->cr_group_info = groups_alloc(slen); 872 cred->cr_group_info = groups_alloc(slen);
842 if (cred->cr_group_info == NULL) 873 if (cred->cr_group_info == NULL)
843 return SVC_DROP; 874 return SVC_CLOSE;
844 for (i = 0; i < slen; i++) 875 for (i = 0; i < slen; i++)
845 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv); 876 GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
846 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { 877 if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
@@ -886,3 +917,56 @@ struct auth_ops svcauth_unix = {
886 .set_client = svcauth_unix_set_client, 917 .set_client = svcauth_unix_set_client,
887}; 918};
888 919
920int ip_map_cache_create(struct net *net)
921{
922 int err = -ENOMEM;
923 struct cache_detail *cd;
924 struct cache_head **tbl;
925 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
926
927 cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
928 if (cd == NULL)
929 goto err_cd;
930
931 tbl = kzalloc(IP_HASHMAX * sizeof(struct cache_head *), GFP_KERNEL);
932 if (tbl == NULL)
933 goto err_tbl;
934
935 cd->owner = THIS_MODULE,
936 cd->hash_size = IP_HASHMAX,
937 cd->hash_table = tbl,
938 cd->name = "auth.unix.ip",
939 cd->cache_put = ip_map_put,
940 cd->cache_upcall = ip_map_upcall,
941 cd->cache_parse = ip_map_parse,
942 cd->cache_show = ip_map_show,
943 cd->match = ip_map_match,
944 cd->init = ip_map_init,
945 cd->update = update,
946 cd->alloc = ip_map_alloc,
947
948 err = cache_register_net(cd, net);
949 if (err)
950 goto err_reg;
951
952 sn->ip_map_cache = cd;
953 return 0;
954
955err_reg:
956 kfree(tbl);
957err_tbl:
958 kfree(cd);
959err_cd:
960 return err;
961}
962
963void ip_map_cache_destroy(struct net *net)
964{
965 struct sunrpc_net *sn;
966
967 sn = net_generic(net, sunrpc_net_id);
968 cache_purge(sn->ip_map_cache);
969 cache_unregister_net(sn->ip_map_cache, net);
970 kfree(sn->ip_map_cache->hash_table);
971 kfree(sn->ip_map_cache);
972}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7e534dd09077..07919e16be3e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -64,7 +64,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *);
64static void svc_sock_free(struct svc_xprt *); 64static void svc_sock_free(struct svc_xprt *);
65 65
66static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
67 struct sockaddr *, int, int); 67 struct net *, struct sockaddr *,
68 int, int);
68#ifdef CONFIG_DEBUG_LOCK_ALLOC 69#ifdef CONFIG_DEBUG_LOCK_ALLOC
69static struct lock_class_key svc_key[2]; 70static struct lock_class_key svc_key[2];
70static struct lock_class_key svc_slock_key[2]; 71static struct lock_class_key svc_slock_key[2];
@@ -657,10 +658,11 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
657} 658}
658 659
659static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 660static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
661 struct net *net,
660 struct sockaddr *sa, int salen, 662 struct sockaddr *sa, int salen,
661 int flags) 663 int flags)
662{ 664{
663 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 665 return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
664} 666}
665 667
666static struct svc_xprt_ops svc_udp_ops = { 668static struct svc_xprt_ops svc_udp_ops = {
@@ -1133,9 +1135,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
1133 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 1135 reclen = htonl(0x80000000|((xbufp->len ) - 4));
1134 memcpy(xbufp->head[0].iov_base, &reclen, 4); 1136 memcpy(xbufp->head[0].iov_base, &reclen, 4);
1135 1137
1136 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags))
1137 return -ENOTCONN;
1138
1139 sent = svc_sendto(rqstp, &rqstp->rq_res); 1138 sent = svc_sendto(rqstp, &rqstp->rq_res);
1140 if (sent != xbufp->len) { 1139 if (sent != xbufp->len) {
1141 printk(KERN_NOTICE 1140 printk(KERN_NOTICE
@@ -1178,10 +1177,11 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
1178} 1177}
1179 1178
1180static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1179static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1180 struct net *net,
1181 struct sockaddr *sa, int salen, 1181 struct sockaddr *sa, int salen,
1182 int flags) 1182 int flags)
1183{ 1183{
1184 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1184 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1185} 1185}
1186 1186
1187static struct svc_xprt_ops svc_tcp_ops = { 1187static struct svc_xprt_ops svc_tcp_ops = {
@@ -1258,19 +1258,13 @@ void svc_sock_update_bufs(struct svc_serv *serv)
1258 * The number of server threads has changed. Update 1258 * The number of server threads has changed. Update
1259 * rcvbuf and sndbuf accordingly on all sockets 1259 * rcvbuf and sndbuf accordingly on all sockets
1260 */ 1260 */
1261 struct list_head *le; 1261 struct svc_sock *svsk;
1262 1262
1263 spin_lock_bh(&serv->sv_lock); 1263 spin_lock_bh(&serv->sv_lock);
1264 list_for_each(le, &serv->sv_permsocks) { 1264 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)
1265 struct svc_sock *svsk =
1266 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1265 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1268 } 1266 list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list)
1269 list_for_each(le, &serv->sv_tempsocks) {
1270 struct svc_sock *svsk =
1271 list_entry(le, struct svc_sock, sk_xprt.xpt_list);
1272 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1267 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1273 }
1274 spin_unlock_bh(&serv->sv_lock); 1268 spin_unlock_bh(&serv->sv_lock);
1275} 1269}
1276EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1270EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
@@ -1385,6 +1379,7 @@ EXPORT_SYMBOL_GPL(svc_addsock);
1385 */ 1379 */
1386static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1380static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1387 int protocol, 1381 int protocol,
1382 struct net *net,
1388 struct sockaddr *sin, int len, 1383 struct sockaddr *sin, int len,
1389 int flags) 1384 int flags)
1390{ 1385{
@@ -1421,7 +1416,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1421 return ERR_PTR(-EINVAL); 1416 return ERR_PTR(-EINVAL);
1422 } 1417 }
1423 1418
1424 error = sock_create_kern(family, type, protocol, &sock); 1419 error = __sock_create(net, family, type, protocol, &sock, 1);
1425 if (error < 0) 1420 if (error < 0)
1426 return ERR_PTR(error); 1421 return ERR_PTR(error);
1427 1422
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index a1f82a87d34d..cd9e841e7492 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
111} 111}
112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace); 112EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
113 113
114/**
115 * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
116 * @buf: XDR buffer where string resides
117 * @len: length of string, in bytes
118 *
119 */
120void
121xdr_terminate_string(struct xdr_buf *buf, const u32 len)
122{
123 char *kaddr;
124
125 kaddr = kmap_atomic(buf->pages[0], KM_USER0);
126 kaddr[buf->page_base + len] = '\0';
127 kunmap_atomic(kaddr, KM_USER0);
128}
129EXPORT_SYMBOL(xdr_terminate_string);
130
114void 131void
115xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
116 unsigned int len) 133 unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
395{ 412{
396 struct kvec *tail; 413 struct kvec *tail;
397 size_t copy; 414 size_t copy;
398 char *p;
399 unsigned int pglen = buf->page_len; 415 unsigned int pglen = buf->page_len;
416 unsigned int tailbuf_len;
400 417
401 tail = buf->tail; 418 tail = buf->tail;
402 BUG_ON (len > pglen); 419 BUG_ON (len > pglen);
403 420
421 tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
422
404 /* Shift the tail first */ 423 /* Shift the tail first */
405 if (tail->iov_len != 0) { 424 if (tailbuf_len != 0) {
406 p = (char *)tail->iov_base + len; 425 unsigned int free_space = tailbuf_len - tail->iov_len;
426
427 if (len < free_space)
428 free_space = len;
429 tail->iov_len += free_space;
430
431 copy = len;
407 if (tail->iov_len > len) { 432 if (tail->iov_len > len) {
408 copy = tail->iov_len - len; 433 char *p = (char *)tail->iov_base + len;
409 memmove(p, tail->iov_base, copy); 434 memmove(p, tail->iov_base, tail->iov_len - len);
410 } else 435 } else
411 buf->buflen -= len;
412 /* Copy from the inlined pages into the tail */
413 copy = len;
414 if (copy > tail->iov_len)
415 copy = tail->iov_len; 436 copy = tail->iov_len;
437 /* Copy from the inlined pages into the tail */
416 _copy_from_pages((char *)tail->iov_base, 438 _copy_from_pages((char *)tail->iov_base,
417 buf->pages, buf->page_base + pglen - len, 439 buf->pages, buf->page_base + pglen - len,
418 copy); 440 copy);
@@ -551,6 +573,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
551EXPORT_SYMBOL_GPL(xdr_init_decode); 573EXPORT_SYMBOL_GPL(xdr_init_decode);
552 574
553/** 575/**
576 * xdr_inline_peek - Allow read-ahead in the XDR data stream
577 * @xdr: pointer to xdr_stream struct
578 * @nbytes: number of bytes of data to decode
579 *
580 * Check if the input buffer is long enough to enable us to decode
581 * 'nbytes' more bytes of data starting at the current position.
582 * If so return the current pointer without updating the current
583 * pointer position.
584 */
585__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
586{
587 __be32 *p = xdr->p;
588 __be32 *q = p + XDR_QUADLEN(nbytes);
589
590 if (unlikely(q > xdr->end || q < p))
591 return NULL;
592 return p;
593}
594EXPORT_SYMBOL_GPL(xdr_inline_peek);
595
596/**
554 * xdr_inline_decode - Retrieve non-page XDR data to decode 597 * xdr_inline_decode - Retrieve non-page XDR data to decode
555 * @xdr: pointer to xdr_stream struct 598 * @xdr: pointer to xdr_stream struct
556 * @nbytes: number of bytes of data to decode 599 * @nbytes: number of bytes of data to decode
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 970fb00f388c..4c8f18aff7c3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -199,8 +199,6 @@ int xprt_reserve_xprt(struct rpc_task *task)
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 200 if (task == xprt->snd_task)
201 return 1; 201 return 1;
202 if (task == NULL)
203 return 0;
204 goto out_sleep; 202 goto out_sleep;
205 } 203 }
206 xprt->snd_task = task; 204 xprt->snd_task = task;
@@ -757,13 +755,11 @@ static void xprt_connect_status(struct rpc_task *task)
757 */ 755 */
758struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) 756struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
759{ 757{
760 struct list_head *pos; 758 struct rpc_rqst *entry;
761 759
762 list_for_each(pos, &xprt->recv) { 760 list_for_each_entry(entry, &xprt->recv, rq_list)
763 struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
764 if (entry->rq_xid == xid) 761 if (entry->rq_xid == xid)
765 return entry; 762 return entry;
766 }
767 763
768 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n", 764 dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
769 ntohl(xid)); 765 ntohl(xid));
@@ -962,6 +958,37 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
962 spin_unlock(&xprt->reserve_lock); 958 spin_unlock(&xprt->reserve_lock);
963} 959}
964 960
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
962{
963 struct rpc_xprt *xprt;
964
965 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL)
967 goto out;
968
969 xprt->max_reqs = max_req;
970 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
971 if (xprt->slot == NULL)
972 goto out_free;
973
974 xprt->xprt_net = get_net(net);
975 return xprt;
976
977out_free:
978 kfree(xprt);
979out:
980 return NULL;
981}
982EXPORT_SYMBOL_GPL(xprt_alloc);
983
984void xprt_free(struct rpc_xprt *xprt)
985{
986 put_net(xprt->xprt_net);
987 kfree(xprt->slot);
988 kfree(xprt);
989}
990EXPORT_SYMBOL_GPL(xprt_free);
991
965/** 992/**
966 * xprt_reserve - allocate an RPC request slot 993 * xprt_reserve - allocate an RPC request slot
967 * @task: RPC task requesting a slot allocation 994 * @task: RPC task requesting a slot allocation
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..9df1eadc912a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -798,8 +805,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 805 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 806 continue;
800 atomic_dec(&xprt->sc_dma_used); 807 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 809 frmr->direction);
803 } 810 }
804} 811}
805 812
@@ -1184,7 +1191,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1191 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1192 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1193 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1194 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1195}
1189 1196
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1197static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1281,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1281 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1282 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1283 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1284 return -ENOTCONN;
1278 continue; 1285 continue;
1279 } 1286 }
1280 /* Take a transport ref for each WR posted */ 1287 /* Take a transport ref for each WR posted */
@@ -1306,7 +1313,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1313 enum rpcrdma_errcode err)
1307{ 1314{
1308 struct ib_send_wr err_wr; 1315 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1316 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1317 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1318 u32 *va;
@@ -1319,26 +1325,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1325 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1326 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1327
1328 ctxt = svc_rdma_get_context(xprt);
1329 ctxt->direction = DMA_FROM_DEVICE;
1330 ctxt->count = 1;
1331 ctxt->pages[0] = p;
1332
1322 /* Prepare SGE for local address */ 1333 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1334 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1337 put_page(p);
1327 return; 1338 return;
1328 } 1339 }
1329 atomic_inc(&xprt->sc_dma_used); 1340 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1341 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1342 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1343
1337 /* Prepare SEND WR */ 1344 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1345 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1346 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1347 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1348 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1349 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1350 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1351 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1355,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1355 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1356 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1357 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1358 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1359 svc_rdma_put_context(ctxt, 1);
1355 } 1360 }
1356} 1361}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fe9306bf10cc..dfcab5ac65af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,8 +774,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
774 774
775 xs_close(xprt); 775 xs_close(xprt);
776 xs_free_peer_addresses(xprt); 776 xs_free_peer_addresses(xprt);
777 kfree(xprt->slot); 777 xprt_free(xprt);
778 kfree(xprt);
779 module_put(THIS_MODULE); 778 module_put(THIS_MODULE);
780} 779}
781 780
@@ -1516,7 +1515,7 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1516 xs_update_peer_port(xprt); 1515 xs_update_peer_port(xprt);
1517} 1516}
1518 1517
1519static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1518static unsigned short xs_get_srcport(struct sock_xprt *transport)
1520{ 1519{
1521 unsigned short port = transport->srcport; 1520 unsigned short port = transport->srcport;
1522 1521
@@ -1525,7 +1524,7 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket
1525 return port; 1524 return port;
1526} 1525}
1527 1526
1528static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1527static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
1529{ 1528{
1530 if (transport->srcport != 0) 1529 if (transport->srcport != 0)
1531 transport->srcport = 0; 1530 transport->srcport = 0;
@@ -1535,23 +1534,18 @@ static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket
1535 return xprt_max_resvport; 1534 return xprt_max_resvport;
1536 return --port; 1535 return --port;
1537} 1536}
1538 1537static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1539static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1540{ 1538{
1541 struct sockaddr_in myaddr = { 1539 struct sockaddr_storage myaddr;
1542 .sin_family = AF_INET,
1543 };
1544 struct sockaddr_in *sa;
1545 int err, nloop = 0; 1540 int err, nloop = 0;
1546 unsigned short port = xs_get_srcport(transport, sock); 1541 unsigned short port = xs_get_srcport(transport);
1547 unsigned short last; 1542 unsigned short last;
1548 1543
1549 sa = (struct sockaddr_in *)&transport->srcaddr; 1544 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1550 myaddr.sin_addr = sa->sin_addr;
1551 do { 1545 do {
1552 myaddr.sin_port = htons(port); 1546 rpc_set_port((struct sockaddr *)&myaddr, port);
1553 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1547 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1554 sizeof(myaddr)); 1548 transport->xprt.addrlen);
1555 if (port == 0) 1549 if (port == 0)
1556 break; 1550 break;
1557 if (err == 0) { 1551 if (err == 0) {
@@ -1559,48 +1553,23 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1559 break; 1553 break;
1560 } 1554 }
1561 last = port; 1555 last = port;
1562 port = xs_next_srcport(transport, sock, port); 1556 port = xs_next_srcport(transport, port);
1563 if (port > last) 1557 if (port > last)
1564 nloop++; 1558 nloop++;
1565 } while (err == -EADDRINUSE && nloop != 2); 1559 } while (err == -EADDRINUSE && nloop != 2);
1566 dprintk("RPC: %s %pI4:%u: %s (%d)\n",
1567 __func__, &myaddr.sin_addr,
1568 port, err ? "failed" : "ok", err);
1569 return err;
1570}
1571
1572static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1573{
1574 struct sockaddr_in6 myaddr = {
1575 .sin6_family = AF_INET6,
1576 };
1577 struct sockaddr_in6 *sa;
1578 int err, nloop = 0;
1579 unsigned short port = xs_get_srcport(transport, sock);
1580 unsigned short last;
1581 1560
1582 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1561 if (myaddr.ss_family == AF_INET)
1583 myaddr.sin6_addr = sa->sin6_addr; 1562 dprintk("RPC: %s %pI4:%u: %s (%d)\n", __func__,
1584 do { 1563 &((struct sockaddr_in *)&myaddr)->sin_addr,
1585 myaddr.sin6_port = htons(port); 1564 port, err ? "failed" : "ok", err);
1586 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1565 else
1587 sizeof(myaddr)); 1566 dprintk("RPC: %s %pI6:%u: %s (%d)\n", __func__,
1588 if (port == 0) 1567 &((struct sockaddr_in6 *)&myaddr)->sin6_addr,
1589 break; 1568 port, err ? "failed" : "ok", err);
1590 if (err == 0) {
1591 transport->srcport = port;
1592 break;
1593 }
1594 last = port;
1595 port = xs_next_srcport(transport, sock, port);
1596 if (port > last)
1597 nloop++;
1598 } while (err == -EADDRINUSE && nloop != 2);
1599 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
1600 &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1601 return err; 1569 return err;
1602} 1570}
1603 1571
1572
1604#ifdef CONFIG_DEBUG_LOCK_ALLOC 1573#ifdef CONFIG_DEBUG_LOCK_ALLOC
1605static struct lock_class_key xs_key[2]; 1574static struct lock_class_key xs_key[2];
1606static struct lock_class_key xs_slock_key[2]; 1575static struct lock_class_key xs_slock_key[2];
@@ -1622,6 +1591,18 @@ static inline void xs_reclassify_socket6(struct socket *sock)
1622 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1591 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1623 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1592 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1624} 1593}
1594
1595static inline void xs_reclassify_socket(int family, struct socket *sock)
1596{
1597 switch (family) {
1598 case AF_INET:
1599 xs_reclassify_socket4(sock);
1600 break;
1601 case AF_INET6:
1602 xs_reclassify_socket6(sock);
1603 break;
1604 }
1605}
1625#else 1606#else
1626static inline void xs_reclassify_socket4(struct socket *sock) 1607static inline void xs_reclassify_socket4(struct socket *sock)
1627{ 1608{
@@ -1630,8 +1611,36 @@ static inline void xs_reclassify_socket4(struct socket *sock)
1630static inline void xs_reclassify_socket6(struct socket *sock) 1611static inline void xs_reclassify_socket6(struct socket *sock)
1631{ 1612{
1632} 1613}
1614
1615static inline void xs_reclassify_socket(int family, struct socket *sock)
1616{
1617}
1633#endif 1618#endif
1634 1619
1620static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1621 struct sock_xprt *transport, int family, int type, int protocol)
1622{
1623 struct socket *sock;
1624 int err;
1625
1626 err = __sock_create(xprt->xprt_net, family, type, protocol, &sock, 1);
1627 if (err < 0) {
1628 dprintk("RPC: can't create %d transport socket (%d).\n",
1629 protocol, -err);
1630 goto out;
1631 }
1632 xs_reclassify_socket(family, sock);
1633
1634 if (xs_bind(transport, sock)) {
1635 sock_release(sock);
1636 goto out;
1637 }
1638
1639 return sock;
1640out:
1641 return ERR_PTR(err);
1642}
1643
1635static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1644static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1636{ 1645{
1637 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1646 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1661,82 +1670,23 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1661 xs_udp_do_set_buffer_size(xprt); 1670 xs_udp_do_set_buffer_size(xprt);
1662} 1671}
1663 1672
1664/** 1673static void xs_udp_setup_socket(struct work_struct *work)
1665 * xs_udp_connect_worker4 - set up a UDP socket
1666 * @work: RPC transport to connect
1667 *
1668 * Invoked by a work queue tasklet.
1669 */
1670static void xs_udp_connect_worker4(struct work_struct *work)
1671{ 1674{
1672 struct sock_xprt *transport = 1675 struct sock_xprt *transport =
1673 container_of(work, struct sock_xprt, connect_worker.work); 1676 container_of(work, struct sock_xprt, connect_worker.work);
1674 struct rpc_xprt *xprt = &transport->xprt; 1677 struct rpc_xprt *xprt = &transport->xprt;
1675 struct socket *sock = transport->sock; 1678 struct socket *sock = transport->sock;
1676 int err, status = -EIO; 1679 int status = -EIO;
1677 1680
1678 if (xprt->shutdown) 1681 if (xprt->shutdown)
1679 goto out; 1682 goto out;
1680 1683
1681 /* Start by resetting any existing state */ 1684 /* Start by resetting any existing state */
1682 xs_reset_transport(transport); 1685 xs_reset_transport(transport);
1683 1686 sock = xs_create_sock(xprt, transport,
1684 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1687 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
1685 if (err < 0) { 1688 if (IS_ERR(sock))
1686 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1687 goto out; 1689 goto out;
1688 }
1689 xs_reclassify_socket4(sock);
1690
1691 if (xs_bind4(transport, sock)) {
1692 sock_release(sock);
1693 goto out;
1694 }
1695
1696 dprintk("RPC: worker connecting xprt %p via %s to "
1697 "%s (port %s)\n", xprt,
1698 xprt->address_strings[RPC_DISPLAY_PROTO],
1699 xprt->address_strings[RPC_DISPLAY_ADDR],
1700 xprt->address_strings[RPC_DISPLAY_PORT]);
1701
1702 xs_udp_finish_connecting(xprt, sock);
1703 status = 0;
1704out:
1705 xprt_clear_connecting(xprt);
1706 xprt_wake_pending_tasks(xprt, status);
1707}
1708
1709/**
1710 * xs_udp_connect_worker6 - set up a UDP socket
1711 * @work: RPC transport to connect
1712 *
1713 * Invoked by a work queue tasklet.
1714 */
1715static void xs_udp_connect_worker6(struct work_struct *work)
1716{
1717 struct sock_xprt *transport =
1718 container_of(work, struct sock_xprt, connect_worker.work);
1719 struct rpc_xprt *xprt = &transport->xprt;
1720 struct socket *sock = transport->sock;
1721 int err, status = -EIO;
1722
1723 if (xprt->shutdown)
1724 goto out;
1725
1726 /* Start by resetting any existing state */
1727 xs_reset_transport(transport);
1728
1729 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1730 if (err < 0) {
1731 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1732 goto out;
1733 }
1734 xs_reclassify_socket6(sock);
1735
1736 if (xs_bind6(transport, sock) < 0) {
1737 sock_release(sock);
1738 goto out;
1739 }
1740 1690
1741 dprintk("RPC: worker connecting xprt %p via %s to " 1691 dprintk("RPC: worker connecting xprt %p via %s to "
1742 "%s (port %s)\n", xprt, 1692 "%s (port %s)\n", xprt,
@@ -1755,12 +1705,12 @@ out:
1755 * We need to preserve the port number so the reply cache on the server can 1705 * We need to preserve the port number so the reply cache on the server can
1756 * find our cached RPC replies when we get around to reconnecting. 1706 * find our cached RPC replies when we get around to reconnecting.
1757 */ 1707 */
1758static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1708static void xs_abort_connection(struct sock_xprt *transport)
1759{ 1709{
1760 int result; 1710 int result;
1761 struct sockaddr any; 1711 struct sockaddr any;
1762 1712
1763 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1713 dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
1764 1714
1765 /* 1715 /*
1766 * Disconnect the transport socket by doing a connect operation 1716 * Disconnect the transport socket by doing a connect operation
@@ -1770,13 +1720,13 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo
1770 any.sa_family = AF_UNSPEC; 1720 any.sa_family = AF_UNSPEC;
1771 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1721 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1772 if (!result) 1722 if (!result)
1773 xs_sock_mark_closed(xprt); 1723 xs_sock_mark_closed(&transport->xprt);
1774 else 1724 else
1775 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1725 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1776 result); 1726 result);
1777} 1727}
1778 1728
1779static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1729static void xs_tcp_reuse_connection(struct sock_xprt *transport)
1780{ 1730{
1781 unsigned int state = transport->inet->sk_state; 1731 unsigned int state = transport->inet->sk_state;
1782 1732
@@ -1799,7 +1749,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
1799 "sk_shutdown set to %d\n", 1749 "sk_shutdown set to %d\n",
1800 __func__, transport->inet->sk_shutdown); 1750 __func__, transport->inet->sk_shutdown);
1801 } 1751 }
1802 xs_abort_connection(xprt, transport); 1752 xs_abort_connection(transport);
1803} 1753}
1804 1754
1805static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1755static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
@@ -1852,12 +1802,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1852 * 1802 *
1853 * Invoked by a work queue tasklet. 1803 * Invoked by a work queue tasklet.
1854 */ 1804 */
1855static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 1805static void xs_tcp_setup_socket(struct work_struct *work)
1856 struct sock_xprt *transport,
1857 struct socket *(*create_sock)(struct rpc_xprt *,
1858 struct sock_xprt *))
1859{ 1806{
1807 struct sock_xprt *transport =
1808 container_of(work, struct sock_xprt, connect_worker.work);
1860 struct socket *sock = transport->sock; 1809 struct socket *sock = transport->sock;
1810 struct rpc_xprt *xprt = &transport->xprt;
1861 int status = -EIO; 1811 int status = -EIO;
1862 1812
1863 if (xprt->shutdown) 1813 if (xprt->shutdown)
@@ -1865,7 +1815,8 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1865 1815
1866 if (!sock) { 1816 if (!sock) {
1867 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1817 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1868 sock = create_sock(xprt, transport); 1818 sock = xs_create_sock(xprt, transport,
1819 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
1869 if (IS_ERR(sock)) { 1820 if (IS_ERR(sock)) {
1870 status = PTR_ERR(sock); 1821 status = PTR_ERR(sock);
1871 goto out; 1822 goto out;
@@ -1876,7 +1827,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1876 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 1827 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1877 &xprt->state); 1828 &xprt->state);
1878 /* "close" the socket, preserving the local port */ 1829 /* "close" the socket, preserving the local port */
1879 xs_tcp_reuse_connection(xprt, transport); 1830 xs_tcp_reuse_connection(transport);
1880 1831
1881 if (abort_and_exit) 1832 if (abort_and_exit)
1882 goto out_eagain; 1833 goto out_eagain;
@@ -1925,84 +1876,6 @@ out:
1925 xprt_wake_pending_tasks(xprt, status); 1876 xprt_wake_pending_tasks(xprt, status);
1926} 1877}
1927 1878
1928static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1929 struct sock_xprt *transport)
1930{
1931 struct socket *sock;
1932 int err;
1933
1934 /* start from scratch */
1935 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1936 if (err < 0) {
1937 dprintk("RPC: can't create TCP transport socket (%d).\n",
1938 -err);
1939 goto out_err;
1940 }
1941 xs_reclassify_socket4(sock);
1942
1943 if (xs_bind4(transport, sock) < 0) {
1944 sock_release(sock);
1945 goto out_err;
1946 }
1947 return sock;
1948out_err:
1949 return ERR_PTR(-EIO);
1950}
1951
1952/**
1953 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1954 * @work: RPC transport to connect
1955 *
1956 * Invoked by a work queue tasklet.
1957 */
1958static void xs_tcp_connect_worker4(struct work_struct *work)
1959{
1960 struct sock_xprt *transport =
1961 container_of(work, struct sock_xprt, connect_worker.work);
1962 struct rpc_xprt *xprt = &transport->xprt;
1963
1964 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1965}
1966
1967static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1968 struct sock_xprt *transport)
1969{
1970 struct socket *sock;
1971 int err;
1972
1973 /* start from scratch */
1974 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1975 if (err < 0) {
1976 dprintk("RPC: can't create TCP transport socket (%d).\n",
1977 -err);
1978 goto out_err;
1979 }
1980 xs_reclassify_socket6(sock);
1981
1982 if (xs_bind6(transport, sock) < 0) {
1983 sock_release(sock);
1984 goto out_err;
1985 }
1986 return sock;
1987out_err:
1988 return ERR_PTR(-EIO);
1989}
1990
1991/**
1992 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1993 * @work: RPC transport to connect
1994 *
1995 * Invoked by a work queue tasklet.
1996 */
1997static void xs_tcp_connect_worker6(struct work_struct *work)
1998{
1999 struct sock_xprt *transport =
2000 container_of(work, struct sock_xprt, connect_worker.work);
2001 struct rpc_xprt *xprt = &transport->xprt;
2002
2003 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
2004}
2005
2006/** 1879/**
2007 * xs_connect - connect a socket to a remote endpoint 1880 * xs_connect - connect a socket to a remote endpoint
2008 * @task: address of RPC task that manages state of connect request 1881 * @task: address of RPC task that manages state of connect request
@@ -2262,6 +2135,31 @@ static struct rpc_xprt_ops bc_tcp_ops = {
2262 .print_stats = xs_tcp_print_stats, 2135 .print_stats = xs_tcp_print_stats,
2263}; 2136};
2264 2137
2138static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2139{
2140 static const struct sockaddr_in sin = {
2141 .sin_family = AF_INET,
2142 .sin_addr.s_addr = htonl(INADDR_ANY),
2143 };
2144 static const struct sockaddr_in6 sin6 = {
2145 .sin6_family = AF_INET6,
2146 .sin6_addr = IN6ADDR_ANY_INIT,
2147 };
2148
2149 switch (family) {
2150 case AF_INET:
2151 memcpy(sap, &sin, sizeof(sin));
2152 break;
2153 case AF_INET6:
2154 memcpy(sap, &sin6, sizeof(sin6));
2155 break;
2156 default:
2157 dprintk("RPC: %s: Bad address family\n", __func__);
2158 return -EAFNOSUPPORT;
2159 }
2160 return 0;
2161}
2162
2265static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2163static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2266 unsigned int slot_table_size) 2164 unsigned int slot_table_size)
2267{ 2165{
@@ -2273,27 +2171,25 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2273 return ERR_PTR(-EBADF); 2171 return ERR_PTR(-EBADF);
2274 } 2172 }
2275 2173
2276 new = kzalloc(sizeof(*new), GFP_KERNEL); 2174 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
2277 if (new == NULL) { 2175 if (xprt == NULL) {
2278 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2176 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2279 "rpc_xprt\n"); 2177 "rpc_xprt\n");
2280 return ERR_PTR(-ENOMEM); 2178 return ERR_PTR(-ENOMEM);
2281 } 2179 }
2282 xprt = &new->xprt;
2283
2284 xprt->max_reqs = slot_table_size;
2285 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2286 if (xprt->slot == NULL) {
2287 kfree(xprt);
2288 dprintk("RPC: xs_setup_xprt: couldn't allocate slot "
2289 "table\n");
2290 return ERR_PTR(-ENOMEM);
2291 }
2292 2180
2181 new = container_of(xprt, struct sock_xprt, xprt);
2293 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2182 memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2294 xprt->addrlen = args->addrlen; 2183 xprt->addrlen = args->addrlen;
2295 if (args->srcaddr) 2184 if (args->srcaddr)
2296 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2185 memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2186 else {
2187 int err;
2188 err = xs_init_anyaddr(args->dstaddr->sa_family,
2189 (struct sockaddr *)&new->srcaddr);
2190 if (err != 0)
2191 return ERR_PTR(err);
2192 }
2297 2193
2298 return xprt; 2194 return xprt;
2299} 2195}
@@ -2341,7 +2237,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2341 xprt_set_bound(xprt); 2237 xprt_set_bound(xprt);
2342 2238
2343 INIT_DELAYED_WORK(&transport->connect_worker, 2239 INIT_DELAYED_WORK(&transport->connect_worker,
2344 xs_udp_connect_worker4); 2240 xs_udp_setup_socket);
2345 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2241 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2346 break; 2242 break;
2347 case AF_INET6: 2243 case AF_INET6:
@@ -2349,7 +2245,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2349 xprt_set_bound(xprt); 2245 xprt_set_bound(xprt);
2350 2246
2351 INIT_DELAYED_WORK(&transport->connect_worker, 2247 INIT_DELAYED_WORK(&transport->connect_worker,
2352 xs_udp_connect_worker6); 2248 xs_udp_setup_socket);
2353 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2249 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2354 break; 2250 break;
2355 default: 2251 default:
@@ -2371,8 +2267,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2371 return xprt; 2267 return xprt;
2372 ret = ERR_PTR(-EINVAL); 2268 ret = ERR_PTR(-EINVAL);
2373out_err: 2269out_err:
2374 kfree(xprt->slot); 2270 xprt_free(xprt);
2375 kfree(xprt);
2376 return ret; 2271 return ret;
2377} 2272}
2378 2273
@@ -2416,7 +2311,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2416 xprt_set_bound(xprt); 2311 xprt_set_bound(xprt);
2417 2312
2418 INIT_DELAYED_WORK(&transport->connect_worker, 2313 INIT_DELAYED_WORK(&transport->connect_worker,
2419 xs_tcp_connect_worker4); 2314 xs_tcp_setup_socket);
2420 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2315 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2421 break; 2316 break;
2422 case AF_INET6: 2317 case AF_INET6:
@@ -2424,7 +2319,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2424 xprt_set_bound(xprt); 2319 xprt_set_bound(xprt);
2425 2320
2426 INIT_DELAYED_WORK(&transport->connect_worker, 2321 INIT_DELAYED_WORK(&transport->connect_worker,
2427 xs_tcp_connect_worker6); 2322 xs_tcp_setup_socket);
2428 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2323 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2429 break; 2324 break;
2430 default: 2325 default:
@@ -2447,8 +2342,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2447 return xprt; 2342 return xprt;
2448 ret = ERR_PTR(-EINVAL); 2343 ret = ERR_PTR(-EINVAL);
2449out_err: 2344out_err:
2450 kfree(xprt->slot); 2345 xprt_free(xprt);
2451 kfree(xprt);
2452 return ret; 2346 return ret;
2453} 2347}
2454 2348
@@ -2507,15 +2401,10 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2507 goto out_err; 2401 goto out_err;
2508 } 2402 }
2509 2403
2510 if (xprt_bound(xprt)) 2404 dprintk("RPC: set up xprt to %s (port %s) via %s\n",
2511 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2405 xprt->address_strings[RPC_DISPLAY_ADDR],
2512 xprt->address_strings[RPC_DISPLAY_ADDR], 2406 xprt->address_strings[RPC_DISPLAY_PORT],
2513 xprt->address_strings[RPC_DISPLAY_PORT], 2407 xprt->address_strings[RPC_DISPLAY_PROTO]);
2514 xprt->address_strings[RPC_DISPLAY_PROTO]);
2515 else
2516 dprintk("RPC: set up xprt to %s (autobind) via %s\n",
2517 xprt->address_strings[RPC_DISPLAY_ADDR],
2518 xprt->address_strings[RPC_DISPLAY_PROTO]);
2519 2408
2520 /* 2409 /*
2521 * Since we don't want connections for the backchannel, we set 2410 * Since we don't want connections for the backchannel, we set
@@ -2528,8 +2417,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2528 return xprt; 2417 return xprt;
2529 ret = ERR_PTR(-EINVAL); 2418 ret = ERR_PTR(-EINVAL);
2530out_err: 2419out_err:
2531 kfree(xprt->slot); 2420 xprt_free(xprt);
2532 kfree(xprt);
2533 return ret; 2421 return ret;
2534} 2422}
2535 2423
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index c048543ffbeb..8a2e89bffde5 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -41,11 +41,6 @@
41#include "cluster.h" 41#include "cluster.h"
42#include "net.h" 42#include "net.h"
43 43
44u32 tipc_get_addr(void)
45{
46 return tipc_own_addr;
47}
48
49/** 44/**
50 * tipc_addr_domain_valid - validates a network domain address 45 * tipc_addr_domain_valid - validates a network domain address
51 * 46 *
@@ -89,7 +84,7 @@ int tipc_addr_domain_valid(u32 addr)
89 84
90int tipc_addr_node_valid(u32 addr) 85int tipc_addr_node_valid(u32 addr)
91{ 86{
92 return (tipc_addr_domain_valid(addr) && tipc_node(addr)); 87 return tipc_addr_domain_valid(addr) && tipc_node(addr);
93} 88}
94 89
95int tipc_in_scope(u32 domain, u32 addr) 90int tipc_in_scope(u32 domain, u32 addr)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c6689305..22a60fc98392 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -121,6 +121,9 @@ static DEFINE_SPINLOCK(bc_lock);
121 121
122const char tipc_bclink_name[] = "broadcast-link"; 122const char tipc_bclink_name[] = "broadcast-link";
123 123
124static void tipc_nmap_diff(struct tipc_node_map *nm_a,
125 struct tipc_node_map *nm_b,
126 struct tipc_node_map *nm_diff);
124 127
125static u32 buf_seqno(struct sk_buff *buf) 128static u32 buf_seqno(struct sk_buff *buf)
126{ 129{
@@ -143,6 +146,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
143} 146}
144 147
145 148
149static void bclink_set_last_sent(void)
150{
151 if (bcl->next_out)
152 bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
153 else
154 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
155}
156
157u32 tipc_bclink_get_last_sent(void)
158{
159 return bcl->fsm_msg_cnt;
160}
161
146/** 162/**
147 * bclink_set_gap - set gap according to contents of current deferred pkt queue 163 * bclink_set_gap - set gap according to contents of current deferred pkt queue
148 * 164 *
@@ -171,7 +187,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr)
171 187
172static int bclink_ack_allowed(u32 n) 188static int bclink_ack_allowed(u32 n)
173{ 189{
174 return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag); 190 return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag;
175} 191}
176 192
177 193
@@ -237,8 +253,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
237 253
238 /* Try resolving broadcast link congestion, if necessary */ 254 /* Try resolving broadcast link congestion, if necessary */
239 255
240 if (unlikely(bcl->next_out)) 256 if (unlikely(bcl->next_out)) {
241 tipc_link_push_queue(bcl); 257 tipc_link_push_queue(bcl);
258 bclink_set_last_sent();
259 }
242 if (unlikely(released && !list_empty(&bcl->waiting_ports))) 260 if (unlikely(released && !list_empty(&bcl->waiting_ports)))
243 tipc_link_wakeup_ports(bcl, 0); 261 tipc_link_wakeup_ports(bcl, 0);
244 spin_unlock_bh(&bc_lock); 262 spin_unlock_bh(&bc_lock);
@@ -272,7 +290,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
272 if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to)) 290 if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to))
273 return; 291 return;
274 292
275 buf = buf_acquire(INT_H_SIZE); 293 buf = tipc_buf_acquire(INT_H_SIZE);
276 if (buf) { 294 if (buf) {
277 msg = buf_msg(buf); 295 msg = buf_msg(buf);
278 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, 296 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
@@ -395,7 +413,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
395 if (unlikely(res == -ELINKCONG)) 413 if (unlikely(res == -ELINKCONG))
396 buf_discard(buf); 414 buf_discard(buf);
397 else 415 else
398 bcl->stats.sent_info++; 416 bclink_set_last_sent();
399 417
400 if (bcl->out_queue_size > bcl->stats.max_queue_sz) 418 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
401 bcl->stats.max_queue_sz = bcl->out_queue_size; 419 bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +547,6 @@ receive:
529 tipc_node_unlock(node); 547 tipc_node_unlock(node);
530} 548}
531 549
532u32 tipc_bclink_get_last_sent(void)
533{
534 u32 last_sent = mod(bcl->next_out_no - 1);
535
536 if (bcl->next_out)
537 last_sent = mod(buf_seqno(bcl->next_out) - 1);
538 return last_sent;
539}
540
541u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 550u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
542{ 551{
543 return (n_ptr->bclink.supported && 552 return (n_ptr->bclink.supported &&
@@ -570,6 +579,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
570 msg = buf_msg(buf); 579 msg = buf_msg(buf);
571 msg_set_non_seq(msg, 1); 580 msg_set_non_seq(msg, 1);
572 msg_set_mc_netid(msg, tipc_net_id); 581 msg_set_mc_netid(msg, tipc_net_id);
582 bcl->stats.sent_info++;
573 } 583 }
574 584
575 /* Send buffer over bearers until all targets reached */ 585 /* Send buffer over bearers until all targets reached */
@@ -609,11 +619,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
609 bcbearer->remains = bcbearer->remains_new; 619 bcbearer->remains = bcbearer->remains_new;
610 } 620 }
611 621
612 /* Unable to reach all targets */ 622 /*
623 * Unable to reach all targets (indicate success, since currently
624 * there isn't code in place to properly block & unblock the
625 * pseudo-bearer used by the broadcast link)
626 */
613 627
614 bcbearer->bearer.publ.blocked = 1; 628 return TIPC_OK;
615 bcl->stats.bearer_congs++;
616 return 1;
617} 629}
618 630
619/** 631/**
@@ -862,8 +874,9 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
862 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) 874 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
863 */ 875 */
864 876
865void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, 877static void tipc_nmap_diff(struct tipc_node_map *nm_a,
866 struct tipc_node_map *nm_diff) 878 struct tipc_node_map *nm_b,
879 struct tipc_node_map *nm_diff)
867{ 880{
868 int stop = ARRAY_SIZE(nm_a->map); 881 int stop = ARRAY_SIZE(nm_a->map);
869 int w; 882 int w;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index e8c2b81658c7..011c03f0a4ab 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -84,9 +84,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
84 return !memcmp(nm_a, nm_b, sizeof(*nm_a)); 84 return !memcmp(nm_a, nm_b, sizeof(*nm_a));
85} 85}
86 86
87void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
88 struct tipc_node_map *nm_diff);
89
90void tipc_port_list_add(struct port_list *pl_ptr, u32 port); 87void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
91void tipc_port_list_free(struct port_list *pl_ptr); 88void tipc_port_list_free(struct port_list *pl_ptr);
92 89
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 52ae17b2583e..9927d1d56c4f 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -63,7 +63,7 @@ static int media_name_valid(const char *name)
63 len = strlen(name); 63 len = strlen(name);
64 if ((len + 1) > TIPC_MAX_MEDIA_NAME) 64 if ((len + 1) > TIPC_MAX_MEDIA_NAME)
65 return 0; 65 return 0;
66 return (strspn(name, tipc_alphabet) == len); 66 return strspn(name, tipc_alphabet) == len;
67} 67}
68 68
69/** 69/**
@@ -288,9 +288,6 @@ static struct bearer *bearer_find(const char *name)
288 struct bearer *b_ptr; 288 struct bearer *b_ptr;
289 u32 i; 289 u32 i;
290 290
291 if (tipc_mode != TIPC_NET_MODE)
292 return NULL;
293
294 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 291 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
295 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) 292 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
296 return b_ptr; 293 return b_ptr;
@@ -559,8 +556,6 @@ restart:
559 } 556 }
560 557
561 b_ptr = &tipc_bearers[bearer_id]; 558 b_ptr = &tipc_bearers[bearer_id];
562 memset(b_ptr, 0, sizeof(struct bearer));
563
564 strcpy(b_ptr->publ.name, name); 559 strcpy(b_ptr->publ.name, name);
565 res = m_ptr->enable_bearer(&b_ptr->publ); 560 res = m_ptr->enable_bearer(&b_ptr->publ);
566 if (res) { 561 if (res) {
@@ -630,30 +625,17 @@ int tipc_block_bearer(const char *name)
630 * Note: This routine assumes caller holds tipc_net_lock. 625 * Note: This routine assumes caller holds tipc_net_lock.
631 */ 626 */
632 627
633static int bearer_disable(const char *name) 628static int bearer_disable(struct bearer *b_ptr)
634{ 629{
635 struct bearer *b_ptr;
636 struct link *l_ptr; 630 struct link *l_ptr;
637 struct link *temp_l_ptr; 631 struct link *temp_l_ptr;
638 632
639 b_ptr = bearer_find(name); 633 info("Disabling bearer <%s>\n", b_ptr->publ.name);
640 if (!b_ptr) {
641 warn("Attempt to disable unknown bearer <%s>\n", name);
642 return -EINVAL;
643 }
644
645 info("Disabling bearer <%s>\n", name);
646 tipc_disc_stop_link_req(b_ptr->link_req); 634 tipc_disc_stop_link_req(b_ptr->link_req);
647 spin_lock_bh(&b_ptr->publ.lock); 635 spin_lock_bh(&b_ptr->publ.lock);
648 b_ptr->link_req = NULL; 636 b_ptr->link_req = NULL;
649 b_ptr->publ.blocked = 1; 637 b_ptr->publ.blocked = 1;
650 if (b_ptr->media->disable_bearer) { 638 b_ptr->media->disable_bearer(&b_ptr->publ);
651 spin_unlock_bh(&b_ptr->publ.lock);
652 write_unlock_bh(&tipc_net_lock);
653 b_ptr->media->disable_bearer(&b_ptr->publ);
654 write_lock_bh(&tipc_net_lock);
655 spin_lock_bh(&b_ptr->publ.lock);
656 }
657 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 639 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
658 tipc_link_delete(l_ptr); 640 tipc_link_delete(l_ptr);
659 } 641 }
@@ -664,10 +646,16 @@ static int bearer_disable(const char *name)
664 646
665int tipc_disable_bearer(const char *name) 647int tipc_disable_bearer(const char *name)
666{ 648{
649 struct bearer *b_ptr;
667 int res; 650 int res;
668 651
669 write_lock_bh(&tipc_net_lock); 652 write_lock_bh(&tipc_net_lock);
670 res = bearer_disable(name); 653 b_ptr = bearer_find(name);
654 if (b_ptr == NULL) {
655 warn("Attempt to disable unknown bearer <%s>\n", name);
656 res = -EINVAL;
657 } else
658 res = bearer_disable(b_ptr);
671 write_unlock_bh(&tipc_net_lock); 659 write_unlock_bh(&tipc_net_lock);
672 return res; 660 return res;
673} 661}
@@ -680,13 +668,7 @@ void tipc_bearer_stop(void)
680 668
681 for (i = 0; i < MAX_BEARERS; i++) { 669 for (i = 0; i < MAX_BEARERS; i++) {
682 if (tipc_bearers[i].active) 670 if (tipc_bearers[i].active)
683 tipc_bearers[i].publ.blocked = 1; 671 bearer_disable(&tipc_bearers[i]);
684 }
685 for (i = 0; i < MAX_BEARERS; i++) {
686 if (tipc_bearers[i].active)
687 bearer_disable(tipc_bearers[i].publ.name);
688 } 672 }
689 media_count = 0; 673 media_count = 0;
690} 674}
691
692
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index e68f705381bc..7fea14b98b97 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -113,25 +113,6 @@ void tipc_cltr_delete(struct cluster *c_ptr)
113 kfree(c_ptr); 113 kfree(c_ptr);
114} 114}
115 115
116u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
117{
118 struct tipc_node *n_ptr;
119 u32 n_num = tipc_node(addr) + 1;
120
121 if (!c_ptr)
122 return addr;
123 for (; n_num <= c_ptr->highest_node; n_num++) {
124 n_ptr = c_ptr->nodes[n_num];
125 if (n_ptr && tipc_node_has_active_links(n_ptr))
126 return n_ptr->addr;
127 }
128 for (n_num = 1; n_num < tipc_node(addr); n_num++) {
129 n_ptr = c_ptr->nodes[n_num];
130 if (n_ptr && tipc_node_has_active_links(n_ptr))
131 return n_ptr->addr;
132 }
133 return 0;
134}
135 116
136void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr) 117void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
137{ 118{
@@ -232,7 +213,7 @@ struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
232static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) 213static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
233{ 214{
234 u32 size = INT_H_SIZE + data_size; 215 u32 size = INT_H_SIZE + data_size;
235 struct sk_buff *buf = buf_acquire(size); 216 struct sk_buff *buf = tipc_buf_acquire(size);
236 struct tipc_msg *msg; 217 struct tipc_msg *msg;
237 218
238 if (buf) { 219 if (buf) {
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 333efb0b9c44..32636d98c9c6 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -75,7 +75,7 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
75void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest); 75void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
76void tipc_cltr_broadcast(struct sk_buff *buf); 76void tipc_cltr_broadcast(struct sk_buff *buf);
77int tipc_cltr_init(void); 77int tipc_cltr_init(void);
78u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr); 78
79void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); 79void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
80void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest); 80void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
81void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); 81void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 961d1b097146..50a6133a3668 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -95,7 +95,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
95 return 1; 95 return 1;
96} 96}
97 97
98struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) 98static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
99{ 99{
100 struct sk_buff *buf; 100 struct sk_buff *buf;
101 __be32 value_net; 101 __be32 value_net;
@@ -109,6 +109,11 @@ struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
109 return buf; 109 return buf;
110} 110}
111 111
112static struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
113{
114 return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
115}
116
112struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) 117struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
113{ 118{
114 struct sk_buff *buf; 119 struct sk_buff *buf;
@@ -120,139 +125,6 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
120 return buf; 125 return buf;
121} 126}
122 127
123
124#if 0
125
126/* Now obsolete code for handling commands not yet implemented the new way */
127
128/*
129 * Some of this code assumed that the manager structure contains two added
130 * fields:
131 * u32 link_subscriptions;
132 * struct list_head link_subscribers;
133 * which are currently not present. These fields may need to be re-introduced
134 * if and when support for link subscriptions is added.
135 */
136
137void tipc_cfg_link_event(u32 addr, char *name, int up)
138{
139 /* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
140}
141
142int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
143 char *data,
144 u32 sz,
145 u32 *ret_size,
146 struct tipc_portid *orig)
147{
148 int rv = -EINVAL;
149 u32 cmd = msg->cmd;
150
151 *ret_size = 0;
152 switch (cmd) {
153 case TIPC_REMOVE_LINK:
154 case TIPC_CMD_BLOCK_LINK:
155 case TIPC_CMD_UNBLOCK_LINK:
156 if (!cfg_check_connection(orig))
157 rv = link_control(msg->argv.link_name, msg->cmd, 0);
158 break;
159 case TIPC_ESTABLISH:
160 {
161 int connected;
162
163 tipc_isconnected(mng.conn_port_ref, &connected);
164 if (connected || !orig) {
165 rv = TIPC_FAILURE;
166 break;
167 }
168 rv = tipc_connect2port(mng.conn_port_ref, orig);
169 if (rv == TIPC_OK)
170 orig = 0;
171 break;
172 }
173 case TIPC_GET_PEER_ADDRESS:
174 *ret_size = link_peer_addr(msg->argv.link_name, data, sz);
175 break;
176 case TIPC_GET_ROUTES:
177 rv = TIPC_OK;
178 break;
179 default: {}
180 }
181 if (*ret_size)
182 rv = TIPC_OK;
183 return rv;
184}
185
186static void cfg_cmd_event(struct tipc_cmd_msg *msg,
187 char *data,
188 u32 sz,
189 struct tipc_portid const *orig)
190{
191 int rv = -EINVAL;
192 struct tipc_cmd_result_msg rmsg;
193 struct iovec msg_sect[2];
194 int *arg;
195
196 msg->cmd = ntohl(msg->cmd);
197
198 cfg_prepare_res_msg(msg->cmd, msg->usr_handle, rv, &rmsg, msg_sect,
199 data, 0);
200 if (ntohl(msg->magic) != TIPC_MAGIC)
201 goto exit;
202
203 switch (msg->cmd) {
204 case TIPC_CREATE_LINK:
205 if (!cfg_check_connection(orig))
206 rv = disc_create_link(&msg->argv.create_link);
207 break;
208 case TIPC_LINK_SUBSCRIBE:
209 {
210 struct subscr_data *sub;
211
212 if (mng.link_subscriptions > 64)
213 break;
214 sub = kmalloc(sizeof(*sub),
215 GFP_ATOMIC);
216 if (sub == NULL) {
217 warn("Memory squeeze; dropped remote link subscription\n");
218 break;
219 }
220 INIT_LIST_HEAD(&sub->subd_list);
221 tipc_createport(mng.user_ref,
222 (void *)sub,
223 TIPC_HIGH_IMPORTANCE,
224 0,
225 0,
226 (tipc_conn_shutdown_event)cfg_linksubscr_cancel,
227 0,
228 0,
229 (tipc_conn_msg_event)cfg_linksubscr_cancel,
230 0,
231 &sub->port_ref);
232 if (!sub->port_ref) {
233 kfree(sub);
234 break;
235 }
236 memcpy(sub->usr_handle,msg->usr_handle,
237 sizeof(sub->usr_handle));
238 sub->domain = msg->argv.domain;
239 list_add_tail(&sub->subd_list, &mng.link_subscribers);
240 tipc_connect2port(sub->port_ref, orig);
241 rmsg.retval = TIPC_OK;
242 tipc_send(sub->port_ref, 2u, msg_sect);
243 mng.link_subscriptions++;
244 return;
245 }
246 default:
247 rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
248 }
249exit:
250 rmsg.result_len = htonl(msg_sect[1].iov_len);
251 rmsg.retval = htonl(rv);
252 tipc_cfg_respond(msg_sect, 2u, orig);
253}
254#endif
255
256#define MAX_STATS_INFO 2000 128#define MAX_STATS_INFO 2000
257 129
258static struct sk_buff *tipc_show_stats(void) 130static struct sk_buff *tipc_show_stats(void)
@@ -557,14 +429,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
557 case TIPC_CMD_SHOW_PORTS: 429 case TIPC_CMD_SHOW_PORTS:
558 rep_tlv_buf = tipc_port_get_ports(); 430 rep_tlv_buf = tipc_port_get_ports();
559 break; 431 break;
560#if 0
561 case TIPC_CMD_SHOW_PORT_STATS:
562 rep_tlv_buf = port_show_stats(req_tlv_area, req_tlv_space);
563 break;
564 case TIPC_CMD_RESET_PORT_STATS:
565 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED);
566 break;
567#endif
568 case TIPC_CMD_SET_LOG_SIZE: 432 case TIPC_CMD_SET_LOG_SIZE:
569 rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space); 433 rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space);
570 break; 434 break;
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 5cd7cc56c54d..481e12ece715 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -45,7 +45,6 @@
45struct sk_buff *tipc_cfg_reply_alloc(int payload_size); 45struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
46int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, 46int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
47 void *tlv_data, int tlv_data_size); 47 void *tlv_data, int tlv_data_size);
48struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value);
49struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); 48struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
50 49
51static inline struct sk_buff *tipc_cfg_reply_none(void) 50static inline struct sk_buff *tipc_cfg_reply_none(void)
@@ -53,11 +52,6 @@ static inline struct sk_buff *tipc_cfg_reply_none(void)
53 return tipc_cfg_reply_alloc(0); 52 return tipc_cfg_reply_alloc(0);
54} 53}
55 54
56static inline struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
57{
58 return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
59}
60
61static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) 55static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
62{ 56{
63 return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); 57 return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 696468117985..e2a09eb8efd4 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -96,13 +96,8 @@ int tipc_net_id;
96int tipc_remote_management; 96int tipc_remote_management;
97 97
98 98
99int tipc_get_mode(void)
100{
101 return tipc_mode;
102}
103
104/** 99/**
105 * buf_acquire - creates a TIPC message buffer 100 * tipc_buf_acquire - creates a TIPC message buffer
106 * @size: message size (including TIPC header) 101 * @size: message size (including TIPC header)
107 * 102 *
108 * Returns a new buffer with data pointers set to the specified size. 103 * Returns a new buffer with data pointers set to the specified size.
@@ -111,7 +106,7 @@ int tipc_get_mode(void)
111 * There may also be unrequested tailroom present at the buffer's end. 106 * There may also be unrequested tailroom present at the buffer's end.
112 */ 107 */
113 108
114struct sk_buff *buf_acquire(u32 size) 109struct sk_buff *tipc_buf_acquire(u32 size)
115{ 110{
116 struct sk_buff *skb; 111 struct sk_buff *skb;
117 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; 112 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
@@ -129,7 +124,7 @@ struct sk_buff *buf_acquire(u32 size)
129 * tipc_core_stop_net - shut down TIPC networking sub-systems 124 * tipc_core_stop_net - shut down TIPC networking sub-systems
130 */ 125 */
131 126
132void tipc_core_stop_net(void) 127static void tipc_core_stop_net(void)
133{ 128{
134 tipc_eth_media_stop(); 129 tipc_eth_media_stop();
135 tipc_net_stop(); 130 tipc_net_stop();
@@ -154,7 +149,7 @@ int tipc_core_start_net(unsigned long addr)
154 * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode 149 * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
155 */ 150 */
156 151
157void tipc_core_stop(void) 152static void tipc_core_stop(void)
158{ 153{
159 if (tipc_mode != TIPC_NODE_MODE) 154 if (tipc_mode != TIPC_NODE_MODE)
160 return; 155 return;
@@ -169,13 +164,14 @@ void tipc_core_stop(void)
169 tipc_nametbl_stop(); 164 tipc_nametbl_stop();
170 tipc_ref_table_stop(); 165 tipc_ref_table_stop();
171 tipc_socket_stop(); 166 tipc_socket_stop();
167 tipc_log_resize(0);
172} 168}
173 169
174/** 170/**
175 * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode 171 * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
176 */ 172 */
177 173
178int tipc_core_start(void) 174static int tipc_core_start(void)
179{ 175{
180 int res; 176 int res;
181 177
@@ -203,7 +199,9 @@ static int __init tipc_init(void)
203{ 199{
204 int res; 200 int res;
205 201
206 tipc_log_resize(CONFIG_TIPC_LOG); 202 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
203 warn("Unable to create log buffer\n");
204
207 info("Activated (version " TIPC_MOD_VER 205 info("Activated (version " TIPC_MOD_VER
208 " compiled " __DATE__ " " __TIME__ ")\n"); 206 " compiled " __DATE__ " " __TIME__ ")\n");
209 207
@@ -230,7 +228,6 @@ static void __exit tipc_exit(void)
230 tipc_core_stop_net(); 228 tipc_core_stop_net();
231 tipc_core_stop(); 229 tipc_core_stop();
232 info("Deactivated\n"); 230 info("Deactivated\n");
233 tipc_log_resize(0);
234} 231}
235 232
236module_init(tipc_init); 233module_init(tipc_init);
@@ -244,8 +241,6 @@ MODULE_VERSION(TIPC_MOD_VER);
244 241
245EXPORT_SYMBOL(tipc_attach); 242EXPORT_SYMBOL(tipc_attach);
246EXPORT_SYMBOL(tipc_detach); 243EXPORT_SYMBOL(tipc_detach);
247EXPORT_SYMBOL(tipc_get_addr);
248EXPORT_SYMBOL(tipc_get_mode);
249EXPORT_SYMBOL(tipc_createport); 244EXPORT_SYMBOL(tipc_createport);
250EXPORT_SYMBOL(tipc_deleteport); 245EXPORT_SYMBOL(tipc_deleteport);
251EXPORT_SYMBOL(tipc_ownidentity); 246EXPORT_SYMBOL(tipc_ownidentity);
@@ -260,23 +255,10 @@ EXPORT_SYMBOL(tipc_withdraw);
260EXPORT_SYMBOL(tipc_connect2port); 255EXPORT_SYMBOL(tipc_connect2port);
261EXPORT_SYMBOL(tipc_disconnect); 256EXPORT_SYMBOL(tipc_disconnect);
262EXPORT_SYMBOL(tipc_shutdown); 257EXPORT_SYMBOL(tipc_shutdown);
263EXPORT_SYMBOL(tipc_isconnected);
264EXPORT_SYMBOL(tipc_peer);
265EXPORT_SYMBOL(tipc_ref_valid);
266EXPORT_SYMBOL(tipc_send); 258EXPORT_SYMBOL(tipc_send);
267EXPORT_SYMBOL(tipc_send_buf);
268EXPORT_SYMBOL(tipc_send2name); 259EXPORT_SYMBOL(tipc_send2name);
269EXPORT_SYMBOL(tipc_forward2name);
270EXPORT_SYMBOL(tipc_send_buf2name);
271EXPORT_SYMBOL(tipc_forward_buf2name);
272EXPORT_SYMBOL(tipc_send2port); 260EXPORT_SYMBOL(tipc_send2port);
273EXPORT_SYMBOL(tipc_forward2port);
274EXPORT_SYMBOL(tipc_send_buf2port);
275EXPORT_SYMBOL(tipc_forward_buf2port);
276EXPORT_SYMBOL(tipc_multicast); 261EXPORT_SYMBOL(tipc_multicast);
277/* EXPORT_SYMBOL(tipc_multicast_buf); not available yet */
278EXPORT_SYMBOL(tipc_ispublished);
279EXPORT_SYMBOL(tipc_available_nodes);
280 262
281/* TIPC API for external bearers (see tipc_bearer.h) */ 263/* TIPC API for external bearers (see tipc_bearer.h) */
282 264
@@ -293,6 +275,4 @@ EXPORT_SYMBOL(tipc_createport_raw);
293EXPORT_SYMBOL(tipc_reject_msg); 275EXPORT_SYMBOL(tipc_reject_msg);
294EXPORT_SYMBOL(tipc_send_buf_fast); 276EXPORT_SYMBOL(tipc_send_buf_fast);
295EXPORT_SYMBOL(tipc_acknowledge); 277EXPORT_SYMBOL(tipc_acknowledge);
296EXPORT_SYMBOL(tipc_get_port);
297EXPORT_SYMBOL(tipc_get_handle);
298 278
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 188799017abd..e19389e57227 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -83,9 +83,7 @@
83 * Note: TIPC_LOG is configured to echo its output to the system console; 83 * Note: TIPC_LOG is configured to echo its output to the system console;
84 * user-defined buffers can be configured to do the same thing. 84 * user-defined buffers can be configured to do the same thing.
85 */ 85 */
86
87extern struct print_buf *const TIPC_NULL; 86extern struct print_buf *const TIPC_NULL;
88extern struct print_buf *const TIPC_CONS;
89extern struct print_buf *const TIPC_LOG; 87extern struct print_buf *const TIPC_LOG;
90 88
91void tipc_printf(struct print_buf *, const char *fmt, ...); 89void tipc_printf(struct print_buf *, const char *fmt, ...);
@@ -204,10 +202,7 @@ extern atomic_t tipc_user_count;
204 * Routines available to privileged subsystems 202 * Routines available to privileged subsystems
205 */ 203 */
206 204
207extern int tipc_core_start(void); 205extern int tipc_core_start_net(unsigned long);
208extern void tipc_core_stop(void);
209extern int tipc_core_start_net(unsigned long addr);
210extern void tipc_core_stop_net(void);
211extern int tipc_handler_start(void); 206extern int tipc_handler_start(void);
212extern void tipc_handler_stop(void); 207extern void tipc_handler_stop(void);
213extern int tipc_netlink_start(void); 208extern int tipc_netlink_start(void);
@@ -328,7 +323,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
328 return (struct tipc_msg *)skb->data; 323 return (struct tipc_msg *)skb->data;
329} 324}
330 325
331extern struct sk_buff *buf_acquire(u32 size); 326extern struct sk_buff *tipc_buf_acquire(u32 size);
332 327
333/** 328/**
334 * buf_discard - frees a TIPC message buffer 329 * buf_discard - frees a TIPC message buffer
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1885a7edb0c8..46f51d208e5e 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 };
52struct print_buf *const TIPC_NULL = &null_buf; 52struct print_buf *const TIPC_NULL = &null_buf;
53 53
54static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; 54static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
55struct print_buf *const TIPC_CONS = &cons_buf; 55static struct print_buf *const TIPC_CONS = &cons_buf;
56 56
57static struct print_buf log_buf = { NULL, 0, NULL, 1 }; 57static struct print_buf log_buf = { NULL, 0, NULL, 1 };
58struct print_buf *const TIPC_LOG = &log_buf; 58struct print_buf *const TIPC_LOG = &log_buf;
@@ -76,6 +76,10 @@ struct print_buf *const TIPC_LOG = &log_buf;
76static char print_string[TIPC_PB_MAX_STR]; 76static char print_string[TIPC_PB_MAX_STR];
77static DEFINE_SPINLOCK(print_lock); 77static DEFINE_SPINLOCK(print_lock);
78 78
79static void tipc_printbuf_reset(struct print_buf *pb);
80static int tipc_printbuf_empty(struct print_buf *pb);
81static void tipc_printbuf_move(struct print_buf *pb_to,
82 struct print_buf *pb_from);
79 83
80#define FORMAT(PTR,LEN,FMT) \ 84#define FORMAT(PTR,LEN,FMT) \
81{\ 85{\
@@ -116,7 +120,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
116 * @pb: pointer to print buffer structure 120 * @pb: pointer to print buffer structure
117 */ 121 */
118 122
119void tipc_printbuf_reset(struct print_buf *pb) 123static void tipc_printbuf_reset(struct print_buf *pb)
120{ 124{
121 if (pb->buf) { 125 if (pb->buf) {
122 pb->crs = pb->buf; 126 pb->crs = pb->buf;
@@ -132,9 +136,9 @@ void tipc_printbuf_reset(struct print_buf *pb)
132 * Returns non-zero if print buffer is empty. 136 * Returns non-zero if print buffer is empty.
133 */ 137 */
134 138
135int tipc_printbuf_empty(struct print_buf *pb) 139static int tipc_printbuf_empty(struct print_buf *pb)
136{ 140{
137 return (!pb->buf || (pb->crs == pb->buf)); 141 return !pb->buf || (pb->crs == pb->buf);
138} 142}
139 143
140/** 144/**
@@ -169,7 +173,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
169 tipc_printf(pb, err); 173 tipc_printf(pb, err);
170 } 174 }
171 } 175 }
172 return (pb->crs - pb->buf + 1); 176 return pb->crs - pb->buf + 1;
173} 177}
174 178
175/** 179/**
@@ -181,7 +185,8 @@ int tipc_printbuf_validate(struct print_buf *pb)
181 * Source print buffer becomes empty if a successful move occurs. 185 * Source print buffer becomes empty if a successful move occurs.
182 */ 186 */
183 187
184void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) 188static void tipc_printbuf_move(struct print_buf *pb_to,
189 struct print_buf *pb_from)
185{ 190{
186 int len; 191 int len;
187 192
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index 5ef1bc8f64ef..3ba6ba8b434a 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -56,10 +56,7 @@ struct print_buf {
56#define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */ 56#define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */
57 57
58void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size); 58void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
59void tipc_printbuf_reset(struct print_buf *pb);
60int tipc_printbuf_empty(struct print_buf *pb);
61int tipc_printbuf_validate(struct print_buf *pb); 59int tipc_printbuf_validate(struct print_buf *pb);
62void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
63 60
64int tipc_log_resize(int log_size); 61int tipc_log_resize(int log_size);
65 62
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b53..4a7cd3719b78 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -46,16 +46,6 @@
46#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */ 46#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */
47#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */ 47#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */
48 48
49#if 0
50#define GET_NODE_INFO 300
51#define GET_NODE_INFO_RESULT 301
52#define FORWARD_LINK_PROBE 302
53#define LINK_REQUEST_REJECTED 303
54#define LINK_REQUEST_ACCEPTED 304
55#define DROP_LINK_REQUEST 305
56#define CHECK_LINK_COUNT 306
57#endif
58
59/* 49/*
60 * TODO: Most of the inter-cluster setup stuff should be 50 * TODO: Most of the inter-cluster setup stuff should be
61 * rewritten, and be made conformant with specification. 51 * rewritten, and be made conformant with specification.
@@ -78,30 +68,6 @@ struct link_req {
78 unsigned int timer_intv; 68 unsigned int timer_intv;
79}; 69};
80 70
81
82#if 0
83int disc_create_link(const struct tipc_link_create *argv)
84{
85 /*
86 * Code for inter cluster link setup here
87 */
88 return TIPC_OK;
89}
90#endif
91
92/*
93 * disc_lost_link(): A link has lost contact
94 */
95
96void tipc_disc_link_event(u32 addr, char *name, int up)
97{
98 if (in_own_cluster(addr))
99 return;
100 /*
101 * Code for inter cluster link setup here
102 */
103}
104
105/** 71/**
106 * tipc_disc_init_msg - initialize a link setup message 72 * tipc_disc_init_msg - initialize a link setup message
107 * @type: message type (request or response) 73 * @type: message type (request or response)
@@ -115,7 +81,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
115 u32 dest_domain, 81 u32 dest_domain,
116 struct bearer *b_ptr) 82 struct bearer *b_ptr)
117{ 83{
118 struct sk_buff *buf = buf_acquire(DSC_H_SIZE); 84 struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE);
119 struct tipc_msg *msg; 85 struct tipc_msg *msg;
120 86
121 if (buf) { 87 if (buf) {
@@ -203,6 +169,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
203 return; 169 return;
204 } 170 }
205 spin_lock_bh(&n_ptr->lock); 171 spin_lock_bh(&n_ptr->lock);
172
173 /* Don't talk to neighbor during cleanup after last session */
174
175 if (n_ptr->cleanup_required) {
176 spin_unlock_bh(&n_ptr->lock);
177 return;
178 }
179
206 link = n_ptr->links[b_ptr->identity]; 180 link = n_ptr->links[b_ptr->identity];
207 if (!link) { 181 if (!link) {
208 dbg("creating link\n"); 182 dbg("creating link\n");
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index c36eaeb7d5d0..f8e750636123 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -50,9 +50,4 @@ void tipc_disc_stop_link_req(struct link_req *req);
50 50
51void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); 51void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
52 52
53void tipc_disc_link_event(u32 addr, char *name, int up);
54#if 0
55int disc_create_link(const struct tipc_link_create *argv);
56#endif
57
58#endif 53#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6230d16020c4..6e988ba485fd 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -72,17 +72,26 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
72{ 72{
73 struct sk_buff *clone; 73 struct sk_buff *clone;
74 struct net_device *dev; 74 struct net_device *dev;
75 int delta;
75 76
76 clone = skb_clone(buf, GFP_ATOMIC); 77 clone = skb_clone(buf, GFP_ATOMIC);
77 if (clone) { 78 if (!clone)
78 skb_reset_network_header(clone); 79 return 0;
79 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 80
80 clone->dev = dev; 81 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
81 dev_hard_header(clone, dev, ETH_P_TIPC, 82 delta = dev->hard_header_len - skb_headroom(buf);
82 &dest->dev_addr.eth_addr, 83
83 dev->dev_addr, clone->len); 84 if ((delta > 0) &&
84 dev_queue_xmit(clone); 85 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(clone);
87 return 0;
85 } 88 }
89
90 skb_reset_network_header(clone);
91 clone->dev = dev;
92 dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
93 dev->dev_addr, clone->len);
94 dev_queue_xmit(clone);
86 return 0; 95 return 0;
87} 96}
88 97
@@ -92,15 +101,12 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
92 * Accept only packets explicitly sent to this node, or broadcast packets; 101 * Accept only packets explicitly sent to this node, or broadcast packets;
93 * ignores packets sent using Ethernet multicast, and traffic sent to other 102 * ignores packets sent using Ethernet multicast, and traffic sent to other
94 * nodes (which can happen if interface is running in promiscuous mode). 103 * nodes (which can happen if interface is running in promiscuous mode).
95 * Routine truncates any Ethernet padding/CRC appended to the message,
96 * and ensures message size matches actual length
97 */ 104 */
98 105
99static int recv_msg(struct sk_buff *buf, struct net_device *dev, 106static int recv_msg(struct sk_buff *buf, struct net_device *dev,
100 struct packet_type *pt, struct net_device *orig_dev) 107 struct packet_type *pt, struct net_device *orig_dev)
101{ 108{
102 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 109 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
103 u32 size;
104 110
105 if (!net_eq(dev_net(dev), &init_net)) { 111 if (!net_eq(dev_net(dev), &init_net)) {
106 kfree_skb(buf); 112 kfree_skb(buf);
@@ -109,13 +115,9 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
109 115
110 if (likely(eb_ptr->bearer)) { 116 if (likely(eb_ptr->bearer)) {
111 if (likely(buf->pkt_type <= PACKET_BROADCAST)) { 117 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
112 size = msg_size((struct tipc_msg *)buf->data); 118 buf->next = NULL;
113 skb_trim(buf, size); 119 tipc_recv_msg(buf, eb_ptr->bearer);
114 if (likely(buf->len == size)) { 120 return 0;
115 buf->next = NULL;
116 tipc_recv_msg(buf, eb_ptr->bearer);
117 return 0;
118 }
119 } 121 }
120 } 122 }
121 kfree_skb(buf); 123 kfree_skb(buf);
@@ -133,6 +135,16 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
133 struct eth_bearer *eb_ptr = &eth_bearers[0]; 135 struct eth_bearer *eb_ptr = &eth_bearers[0];
134 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 136 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
135 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 137 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
138 int pending_dev = 0;
139
140 /* Find unused Ethernet bearer structure */
141
142 while (eb_ptr->dev) {
143 if (!eb_ptr->bearer)
144 pending_dev++;
145 if (++eb_ptr == stop)
146 return pending_dev ? -EAGAIN : -EDQUOT;
147 }
136 148
137 /* Find device with specified name */ 149 /* Find device with specified name */
138 150
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529b..b31992ccd5d3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -99,23 +99,6 @@ struct link_name {
99 char if_peer[TIPC_MAX_IF_NAME]; 99 char if_peer[TIPC_MAX_IF_NAME];
100}; 100};
101 101
102#if 0
103
104/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
105
106/**
107 * struct link_event - link up/down event notification
108 */
109
110struct link_event {
111 u32 addr;
112 int up;
113 void (*fcn)(u32, char *, int);
114 char name[TIPC_MAX_LINK_NAME];
115};
116
117#endif
118
119static void link_handle_out_of_seq_msg(struct link *l_ptr, 102static void link_handle_out_of_seq_msg(struct link *l_ptr,
120 struct sk_buff *buf); 103 struct sk_buff *buf);
121static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); 104static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
@@ -129,6 +112,9 @@ static void link_state_event(struct link *l_ptr, u32 event);
129static void link_reset_statistics(struct link *l_ptr); 112static void link_reset_statistics(struct link *l_ptr);
130static void link_print(struct link *l_ptr, struct print_buf *buf, 113static void link_print(struct link *l_ptr, struct print_buf *buf,
131 const char *str); 114 const char *str);
115static void link_start(struct link *l_ptr);
116static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
117
132 118
133/* 119/*
134 * Debugging code used by link routines only 120 * Debugging code used by link routines only
@@ -239,13 +225,13 @@ int tipc_link_is_up(struct link *l_ptr)
239{ 225{
240 if (!l_ptr) 226 if (!l_ptr)
241 return 0; 227 return 0;
242 return (link_working_working(l_ptr) || link_working_unknown(l_ptr)); 228 return link_working_working(l_ptr) || link_working_unknown(l_ptr);
243} 229}
244 230
245int tipc_link_is_active(struct link *l_ptr) 231int tipc_link_is_active(struct link *l_ptr)
246{ 232{
247 return ((l_ptr->owner->active_links[0] == l_ptr) || 233 return (l_ptr->owner->active_links[0] == l_ptr) ||
248 (l_ptr->owner->active_links[1] == l_ptr)); 234 (l_ptr->owner->active_links[1] == l_ptr);
249} 235}
250 236
251/** 237/**
@@ -459,7 +445,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
459 445
460 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); 446 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
461 list_add_tail(&l_ptr->link_list, &b_ptr->links); 447 list_add_tail(&l_ptr->link_list, &b_ptr->links);
462 tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr); 448 tipc_k_signal((Handler)link_start, (unsigned long)l_ptr);
463 449
464 dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n", 450 dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
465 l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit); 451 l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit);
@@ -499,9 +485,9 @@ void tipc_link_delete(struct link *l_ptr)
499 kfree(l_ptr); 485 kfree(l_ptr);
500} 486}
501 487
502void tipc_link_start(struct link *l_ptr) 488static void link_start(struct link *l_ptr)
503{ 489{
504 dbg("tipc_link_start %x\n", l_ptr); 490 dbg("link_start %x\n", l_ptr);
505 link_state_event(l_ptr, STARTING_EVT); 491 link_state_event(l_ptr, STARTING_EVT);
506} 492}
507 493
@@ -634,39 +620,9 @@ void tipc_link_stop(struct link *l_ptr)
634 l_ptr->proto_msg_queue = NULL; 620 l_ptr->proto_msg_queue = NULL;
635} 621}
636 622
637#if 0
638
639/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */ 623/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
640
641static void link_recv_event(struct link_event *ev)
642{
643 ev->fcn(ev->addr, ev->name, ev->up);
644 kfree(ev);
645}
646
647static void link_send_event(void (*fcn)(u32 a, char *n, int up),
648 struct link *l_ptr, int up)
649{
650 struct link_event *ev;
651
652 ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
653 if (!ev) {
654 warn("Link event allocation failure\n");
655 return;
656 }
657 ev->addr = l_ptr->addr;
658 ev->up = up;
659 ev->fcn = fcn;
660 memcpy(ev->name, l_ptr->name, TIPC_MAX_LINK_NAME);
661 tipc_k_signal((Handler)link_recv_event, (unsigned long)ev);
662}
663
664#else
665
666#define link_send_event(fcn, l_ptr, up) do { } while (0) 624#define link_send_event(fcn, l_ptr, up) do { } while (0)
667 625
668#endif
669
670void tipc_link_reset(struct link *l_ptr) 626void tipc_link_reset(struct link *l_ptr)
671{ 627{
672 struct sk_buff *buf; 628 struct sk_buff *buf;
@@ -690,10 +646,7 @@ void tipc_link_reset(struct link *l_ptr)
690 646
691 tipc_node_link_down(l_ptr->owner, l_ptr); 647 tipc_node_link_down(l_ptr->owner, l_ptr);
692 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); 648 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
693#if 0 649
694 tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
695 dbg_link_dump();
696#endif
697 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && 650 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) &&
698 l_ptr->owner->permit_changeover) { 651 l_ptr->owner->permit_changeover) {
699 l_ptr->reset_checkpoint = checkpoint; 652 l_ptr->reset_checkpoint = checkpoint;
@@ -1050,7 +1003,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1050 /* Fragmentation needed ? */ 1003 /* Fragmentation needed ? */
1051 1004
1052 if (size > max_packet) 1005 if (size > max_packet)
1053 return tipc_link_send_long_buf(l_ptr, buf); 1006 return link_send_long_buf(l_ptr, buf);
1054 1007
1055 /* Packet can be queued or sent: */ 1008 /* Packet can be queued or sent: */
1056 1009
@@ -1086,7 +1039,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1086 /* Try creating a new bundle */ 1039 /* Try creating a new bundle */
1087 1040
1088 if (size <= max_packet * 2 / 3) { 1041 if (size <= max_packet * 2 / 3) {
1089 struct sk_buff *bundler = buf_acquire(max_packet); 1042 struct sk_buff *bundler = tipc_buf_acquire(max_packet);
1090 struct tipc_msg bundler_hdr; 1043 struct tipc_msg bundler_hdr;
1091 1044
1092 if (bundler) { 1045 if (bundler) {
@@ -1362,7 +1315,7 @@ again:
1362 1315
1363 /* Prepare header of first fragment: */ 1316 /* Prepare header of first fragment: */
1364 1317
1365 buf_chain = buf = buf_acquire(max_pkt); 1318 buf_chain = buf = tipc_buf_acquire(max_pkt);
1366 if (!buf) 1319 if (!buf)
1367 return -ENOMEM; 1320 return -ENOMEM;
1368 buf->next = NULL; 1321 buf->next = NULL;
@@ -1419,7 +1372,7 @@ error:
1419 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); 1372 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
1420 msg_set_fragm_no(&fragm_hdr, ++fragm_no); 1373 msg_set_fragm_no(&fragm_hdr, ++fragm_no);
1421 prev = buf; 1374 prev = buf;
1422 buf = buf_acquire(fragm_sz + INT_H_SIZE); 1375 buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
1423 if (!buf) 1376 if (!buf)
1424 goto error; 1377 goto error;
1425 1378
@@ -1802,6 +1755,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1802 return pskb_may_pull(buf, hdr_size); 1755 return pskb_may_pull(buf, hdr_size);
1803} 1756}
1804 1757
1758/**
1759 * tipc_recv_msg - process TIPC messages arriving from off-node
1760 * @head: pointer to message buffer chain
1761 * @tb_ptr: pointer to bearer message arrived on
1762 *
1763 * Invoked with no locks held. Bearer pointer must point to a valid bearer
1764 * structure (i.e. cannot be NULL), but bearer can be inactive.
1765 */
1766
1805void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1767void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1806{ 1768{
1807 read_lock_bh(&tipc_net_lock); 1769 read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1781,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1819 1781
1820 head = head->next; 1782 head = head->next;
1821 1783
1784 /* Ensure bearer is still enabled */
1785
1786 if (unlikely(!b_ptr->active))
1787 goto cont;
1788
1822 /* Ensure message is well-formed */ 1789 /* Ensure message is well-formed */
1823 1790
1824 if (unlikely(!link_recv_buf_validate(buf))) 1791 if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1822,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1855 goto cont; 1822 goto cont;
1856 } 1823 }
1857 1824
1858 /* Locate unicast link endpoint that should handle message */ 1825 /* Locate neighboring node that sent message */
1859 1826
1860 n_ptr = tipc_node_find(msg_prevnode(msg)); 1827 n_ptr = tipc_node_find(msg_prevnode(msg));
1861 if (unlikely(!n_ptr)) 1828 if (unlikely(!n_ptr))
1862 goto cont; 1829 goto cont;
1863 tipc_node_lock(n_ptr); 1830 tipc_node_lock(n_ptr);
1864 1831
1832 /* Don't talk to neighbor during cleanup after last session */
1833
1834 if (n_ptr->cleanup_required) {
1835 tipc_node_unlock(n_ptr);
1836 goto cont;
1837 }
1838
1839 /* Locate unicast link endpoint that should handle message */
1840
1865 l_ptr = n_ptr->links[b_ptr->identity]; 1841 l_ptr = n_ptr->links[b_ptr->identity];
1866 if (unlikely(!l_ptr)) { 1842 if (unlikely(!l_ptr)) {
1867 tipc_node_unlock(n_ptr); 1843 tipc_node_unlock(n_ptr);
@@ -2172,7 +2148,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2172 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { 2148 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
2173 if (!l_ptr->proto_msg_queue) { 2149 if (!l_ptr->proto_msg_queue) {
2174 l_ptr->proto_msg_queue = 2150 l_ptr->proto_msg_queue =
2175 buf_acquire(sizeof(l_ptr->proto_msg)); 2151 tipc_buf_acquire(sizeof(l_ptr->proto_msg));
2176 } 2152 }
2177 buf = l_ptr->proto_msg_queue; 2153 buf = l_ptr->proto_msg_queue;
2178 if (!buf) 2154 if (!buf)
@@ -2186,7 +2162,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2186 2162
2187 msg_dbg(msg, ">>"); 2163 msg_dbg(msg, ">>");
2188 2164
2189 buf = buf_acquire(msg_size); 2165 buf = tipc_buf_acquire(msg_size);
2190 if (!buf) 2166 if (!buf)
2191 return; 2167 return;
2192 2168
@@ -2345,10 +2321,10 @@ exit:
2345 * tipc_link_tunnel(): Send one message via a link belonging to 2321 * tipc_link_tunnel(): Send one message via a link belonging to
2346 * another bearer. Owner node is locked. 2322 * another bearer. Owner node is locked.
2347 */ 2323 */
2348void tipc_link_tunnel(struct link *l_ptr, 2324static void tipc_link_tunnel(struct link *l_ptr,
2349 struct tipc_msg *tunnel_hdr, 2325 struct tipc_msg *tunnel_hdr,
2350 struct tipc_msg *msg, 2326 struct tipc_msg *msg,
2351 u32 selector) 2327 u32 selector)
2352{ 2328{
2353 struct link *tunnel; 2329 struct link *tunnel;
2354 struct sk_buff *buf; 2330 struct sk_buff *buf;
@@ -2361,7 +2337,7 @@ void tipc_link_tunnel(struct link *l_ptr,
2361 return; 2337 return;
2362 } 2338 }
2363 msg_set_size(tunnel_hdr, length + INT_H_SIZE); 2339 msg_set_size(tunnel_hdr, length + INT_H_SIZE);
2364 buf = buf_acquire(length + INT_H_SIZE); 2340 buf = tipc_buf_acquire(length + INT_H_SIZE);
2365 if (!buf) { 2341 if (!buf) {
2366 warn("Link changeover error, " 2342 warn("Link changeover error, "
2367 "unable to send tunnel msg\n"); 2343 "unable to send tunnel msg\n");
@@ -2407,7 +2383,7 @@ void tipc_link_changeover(struct link *l_ptr)
2407 if (!l_ptr->first_out) { 2383 if (!l_ptr->first_out) {
2408 struct sk_buff *buf; 2384 struct sk_buff *buf;
2409 2385
2410 buf = buf_acquire(INT_H_SIZE); 2386 buf = tipc_buf_acquire(INT_H_SIZE);
2411 if (buf) { 2387 if (buf) {
2412 skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); 2388 skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
2413 msg_set_size(&tunnel_hdr, INT_H_SIZE); 2389 msg_set_size(&tunnel_hdr, INT_H_SIZE);
@@ -2468,7 +2444,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
2468 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ 2444 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */
2469 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 2445 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
2470 msg_set_size(&tunnel_hdr, length + INT_H_SIZE); 2446 msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
2471 outbuf = buf_acquire(length + INT_H_SIZE); 2447 outbuf = tipc_buf_acquire(length + INT_H_SIZE);
2472 if (outbuf == NULL) { 2448 if (outbuf == NULL) {
2473 warn("Link changeover error, " 2449 warn("Link changeover error, "
2474 "unable to send duplicate msg\n"); 2450 "unable to send duplicate msg\n");
@@ -2504,7 +2480,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
2504 u32 size = msg_size(msg); 2480 u32 size = msg_size(msg);
2505 struct sk_buff *eb; 2481 struct sk_buff *eb;
2506 2482
2507 eb = buf_acquire(size); 2483 eb = tipc_buf_acquire(size);
2508 if (eb) 2484 if (eb)
2509 skb_copy_to_linear_data(eb, msg, size); 2485 skb_copy_to_linear_data(eb, msg, size);
2510 return eb; 2486 return eb;
@@ -2632,11 +2608,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
2632 2608
2633 2609
2634/* 2610/*
2635 * tipc_link_send_long_buf: Entry for buffers needing fragmentation. 2611 * link_send_long_buf: Entry for buffers needing fragmentation.
2636 * The buffer is complete, inclusive total message length. 2612 * The buffer is complete, inclusive total message length.
2637 * Returns user data length. 2613 * Returns user data length.
2638 */ 2614 */
2639int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) 2615static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2640{ 2616{
2641 struct tipc_msg *inmsg = buf_msg(buf); 2617 struct tipc_msg *inmsg = buf_msg(buf);
2642 struct tipc_msg fragm_hdr; 2618 struct tipc_msg fragm_hdr;
@@ -2675,7 +2651,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2675 fragm_sz = rest; 2651 fragm_sz = rest;
2676 msg_set_type(&fragm_hdr, LAST_FRAGMENT); 2652 msg_set_type(&fragm_hdr, LAST_FRAGMENT);
2677 } 2653 }
2678 fragm = buf_acquire(fragm_sz + INT_H_SIZE); 2654 fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
2679 if (fragm == NULL) { 2655 if (fragm == NULL) {
2680 warn("Link unable to fragment message\n"); 2656 warn("Link unable to fragment message\n");
2681 dsz = -ENOMEM; 2657 dsz = -ENOMEM;
@@ -2780,7 +2756,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2780 buf_discard(fbuf); 2756 buf_discard(fbuf);
2781 return 0; 2757 return 0;
2782 } 2758 }
2783 pbuf = buf_acquire(msg_size(imsg)); 2759 pbuf = tipc_buf_acquire(msg_size(imsg));
2784 if (pbuf != NULL) { 2760 if (pbuf != NULL) {
2785 pbuf->next = *pending; 2761 pbuf->next = *pending;
2786 *pending = pbuf; 2762 *pending = pbuf;
@@ -3174,44 +3150,6 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
3174 return buf; 3150 return buf;
3175} 3151}
3176 3152
3177#if 0
3178int link_control(const char *name, u32 op, u32 val)
3179{
3180 int res = -EINVAL;
3181 struct link *l_ptr;
3182 u32 bearer_id;
3183 struct tipc_node * node;
3184 u32 a;
3185
3186 a = link_name2addr(name, &bearer_id);
3187 read_lock_bh(&tipc_net_lock);
3188 node = tipc_node_find(a);
3189 if (node) {
3190 tipc_node_lock(node);
3191 l_ptr = node->links[bearer_id];
3192 if (l_ptr) {
3193 if (op == TIPC_REMOVE_LINK) {
3194 struct bearer *b_ptr = l_ptr->b_ptr;
3195 spin_lock_bh(&b_ptr->publ.lock);
3196 tipc_link_delete(l_ptr);
3197 spin_unlock_bh(&b_ptr->publ.lock);
3198 }
3199 if (op == TIPC_CMD_BLOCK_LINK) {
3200 tipc_link_reset(l_ptr);
3201 l_ptr->blocked = 1;
3202 }
3203 if (op == TIPC_CMD_UNBLOCK_LINK) {
3204 l_ptr->blocked = 0;
3205 }
3206 res = 0;
3207 }
3208 tipc_node_unlock(node);
3209 }
3210 read_unlock_bh(&tipc_net_lock);
3211 return res;
3212}
3213#endif
3214
3215/** 3153/**
3216 * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination 3154 * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
3217 * @dest: network address of destination node 3155 * @dest: network address of destination node
@@ -3242,28 +3180,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
3242 return res; 3180 return res;
3243} 3181}
3244 3182
3245#if 0
3246static void link_dump_rec_queue(struct link *l_ptr)
3247{
3248 struct sk_buff *crs;
3249
3250 if (!l_ptr->oldest_deferred_in) {
3251 info("Reception queue empty\n");
3252 return;
3253 }
3254 info("Contents of Reception queue:\n");
3255 crs = l_ptr->oldest_deferred_in;
3256 while (crs) {
3257 if (crs->data == (void *)0x0000a3a3) {
3258 info("buffer %x invalid\n", crs);
3259 return;
3260 }
3261 msg_dbg(buf_msg(crs), "In rec queue:\n");
3262 crs = crs->next;
3263 }
3264}
3265#endif
3266
3267static void link_dump_send_queue(struct link *l_ptr) 3183static void link_dump_send_queue(struct link *l_ptr)
3268{ 3184{
3269 if (l_ptr->next_out) { 3185 if (l_ptr->next_out) {
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 2e5385c47d30..f98bc613de67 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -210,10 +210,6 @@ struct link {
210 u32 msg_length_counts; 210 u32 msg_length_counts;
211 u32 msg_lengths_total; 211 u32 msg_lengths_total;
212 u32 msg_length_profile[7]; 212 u32 msg_length_profile[7];
213#if 0
214 u32 sent_tunneled;
215 u32 recv_tunneled;
216#endif
217 } stats; 213 } stats;
218 214
219 struct print_buf print_buf; 215 struct print_buf print_buf;
@@ -229,7 +225,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest);
229void tipc_link_reset_fragments(struct link *l_ptr); 225void tipc_link_reset_fragments(struct link *l_ptr);
230int tipc_link_is_up(struct link *l_ptr); 226int tipc_link_is_up(struct link *l_ptr);
231int tipc_link_is_active(struct link *l_ptr); 227int tipc_link_is_active(struct link *l_ptr);
232void tipc_link_start(struct link *l_ptr);
233u32 tipc_link_push_packet(struct link *l_ptr); 228u32 tipc_link_push_packet(struct link *l_ptr);
234void tipc_link_stop(struct link *l_ptr); 229void tipc_link_stop(struct link *l_ptr);
235struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); 230struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
@@ -243,9 +238,6 @@ int tipc_link_send_sections_fast(struct port* sender,
243 struct iovec const *msg_sect, 238 struct iovec const *msg_sect,
244 const u32 num_sect, 239 const u32 num_sect,
245 u32 destnode); 240 u32 destnode);
246int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
247void tipc_link_tunnel(struct link *l_ptr, struct tipc_msg *tnl_hdr,
248 struct tipc_msg *msg, u32 selector);
249void tipc_link_recv_bundle(struct sk_buff *buf); 241void tipc_link_recv_bundle(struct sk_buff *buf);
250int tipc_link_recv_fragment(struct sk_buff **pending, 242int tipc_link_recv_fragment(struct sk_buff **pending,
251 struct sk_buff **fb, 243 struct sk_buff **fb,
@@ -279,12 +271,12 @@ static inline int between(u32 lower, u32 upper, u32 n)
279 271
280static inline int less_eq(u32 left, u32 right) 272static inline int less_eq(u32 left, u32 right)
281{ 273{
282 return (mod(right - left) < 32768u); 274 return mod(right - left) < 32768u;
283} 275}
284 276
285static inline int less(u32 left, u32 right) 277static inline int less(u32 left, u32 right)
286{ 278{
287 return (less_eq(left, right) && (mod(right) != mod(left))); 279 return less_eq(left, right) && (mod(right) != mod(left));
288} 280}
289 281
290static inline u32 lesser(u32 left, u32 right) 282static inline u32 lesser(u32 left, u32 right)
@@ -299,32 +291,32 @@ static inline u32 lesser(u32 left, u32 right)
299 291
300static inline int link_working_working(struct link *l_ptr) 292static inline int link_working_working(struct link *l_ptr)
301{ 293{
302 return (l_ptr->state == WORKING_WORKING); 294 return l_ptr->state == WORKING_WORKING;
303} 295}
304 296
305static inline int link_working_unknown(struct link *l_ptr) 297static inline int link_working_unknown(struct link *l_ptr)
306{ 298{
307 return (l_ptr->state == WORKING_UNKNOWN); 299 return l_ptr->state == WORKING_UNKNOWN;
308} 300}
309 301
310static inline int link_reset_unknown(struct link *l_ptr) 302static inline int link_reset_unknown(struct link *l_ptr)
311{ 303{
312 return (l_ptr->state == RESET_UNKNOWN); 304 return l_ptr->state == RESET_UNKNOWN;
313} 305}
314 306
315static inline int link_reset_reset(struct link *l_ptr) 307static inline int link_reset_reset(struct link *l_ptr)
316{ 308{
317 return (l_ptr->state == RESET_RESET); 309 return l_ptr->state == RESET_RESET;
318} 310}
319 311
320static inline int link_blocked(struct link *l_ptr) 312static inline int link_blocked(struct link *l_ptr)
321{ 313{
322 return (l_ptr->exp_msg_count || l_ptr->blocked); 314 return l_ptr->exp_msg_count || l_ptr->blocked;
323} 315}
324 316
325static inline int link_congested(struct link *l_ptr) 317static inline int link_congested(struct link *l_ptr)
326{ 318{
327 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]); 319 return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
328} 320}
329 321
330#endif 322#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 381063817b41..ecb532fb0351 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -112,7 +112,7 @@ int tipc_msg_build(struct tipc_msg *hdr,
112 return dsz; 112 return dsz;
113 } 113 }
114 114
115 *buf = buf_acquire(sz); 115 *buf = tipc_buf_acquire(sz);
116 if (!(*buf)) 116 if (!(*buf))
117 return -ENOMEM; 117 return -ENOMEM;
118 skb_copy_to_linear_data(*buf, hdr, hsz); 118 skb_copy_to_linear_data(*buf, hdr, hsz);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 995d2da35b01..031aad18efce 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m)
104 104
105static inline u32 msg_isdata(struct tipc_msg *m) 105static inline u32 msg_isdata(struct tipc_msg *m)
106{ 106{
107 return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); 107 return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE;
108} 108}
109 109
110static inline void msg_set_user(struct tipc_msg *m, u32 n) 110static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
289 289
290static inline int msg_is_dest(struct tipc_msg *m, u32 d) 290static inline int msg_is_dest(struct tipc_msg *m, u32 d)
291{ 291{
292 return(msg_short(m) || (msg_destnode(m) == d)); 292 return msg_short(m) || (msg_destnode(m) == d);
293} 293}
294 294
295static inline u32 msg_routed(struct tipc_msg *m) 295static inline u32 msg_routed(struct tipc_msg *m)
@@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
632 632
633static inline u32 msg_max_pkt(struct tipc_msg *m) 633static inline u32 msg_max_pkt(struct tipc_msg *m)
634{ 634{
635 return (msg_bits(m, 9, 16, 0xffff) * 4); 635 return msg_bits(m, 9, 16, 0xffff) * 4;
636} 636}
637 637
638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) 638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6ac3c543250b..7b907171f879 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -98,7 +98,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
98 98
99static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) 99static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
100{ 100{
101 struct sk_buff *buf = buf_acquire(LONG_H_SIZE + size); 101 struct sk_buff *buf = tipc_buf_acquire(LONG_H_SIZE + size);
102 struct tipc_msg *msg; 102 struct tipc_msg *msg;
103 103
104 if (buf != NULL) { 104 if (buf != NULL) {
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3f..3a8de4334da1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
116 116
117static int hash(int x) 117static int hash(int x)
118{ 118{
119 return(x & (tipc_nametbl_size - 1)); 119 return x & (tipc_nametbl_size - 1);
120} 120}
121 121
122/** 122/**
@@ -613,8 +613,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
613} 613}
614 614
615/* 615/*
616 * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid. 616 * tipc_nametbl_translate - translate name to port id
617 * Very time-critical.
618 * 617 *
619 * Note: on entry 'destnode' is the search domain used during translation; 618 * Note: on entry 'destnode' is the search domain used during translation;
620 * on exit it passes back the node address of the matching port (if any) 619 * on exit it passes back the node address of the matching port (if any)
@@ -685,7 +684,6 @@ found:
685 } 684 }
686 spin_unlock_bh(&seq->lock); 685 spin_unlock_bh(&seq->lock);
687not_found: 686not_found:
688 *destnode = 0;
689 read_unlock_bh(&tipc_nametbl_lock); 687 read_unlock_bh(&tipc_nametbl_lock);
690 return 0; 688 return 0;
691} 689}
@@ -877,7 +875,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
877 u32 index) 875 u32 index)
878{ 876{
879 char portIdStr[27]; 877 char portIdStr[27];
880 char *scopeStr; 878 const char *scope_str[] = {"", " zone", " cluster", " node"};
881 struct publication *publ = sseq->zone_list; 879 struct publication *publ = sseq->zone_list;
882 880
883 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); 881 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +891,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
893 tipc_node(publ->node), publ->ref); 891 tipc_node(publ->node), publ->ref);
894 tipc_printf(buf, "%-26s ", portIdStr); 892 tipc_printf(buf, "%-26s ", portIdStr);
895 if (depth > 3) { 893 if (depth > 3) {
896 if (publ->node != tipc_own_addr) 894 tipc_printf(buf, "%-10u %s", publ->key,
897 scopeStr = ""; 895 scope_str[publ->scope]);
898 else if (publ->scope == TIPC_NODE_SCOPE)
899 scopeStr = "node";
900 else if (publ->scope == TIPC_CLUSTER_SCOPE)
901 scopeStr = "cluster";
902 else
903 scopeStr = "zone";
904 tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
905 } 896 }
906 897
907 publ = publ->zone_list_next; 898 publ = publ->zone_list_next;
@@ -951,24 +942,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
951 942
952static void nametbl_header(struct print_buf *buf, u32 depth) 943static void nametbl_header(struct print_buf *buf, u32 depth)
953{ 944{
954 tipc_printf(buf, "Type "); 945 const char *header[] = {
955 946 "Type ",
956 if (depth > 1) 947 "Lower Upper ",
957 tipc_printf(buf, "Lower Upper "); 948 "Port Identity ",
958 if (depth > 2) 949 "Publication Scope"
959 tipc_printf(buf, "Port Identity "); 950 };
960 if (depth > 3) 951
961 tipc_printf(buf, "Publication"); 952 int i;
962 953
963 tipc_printf(buf, "\n-----------"); 954 if (depth > 4)
964 955 depth = 4;
965 if (depth > 1) 956 for (i = 0; i < depth; i++)
966 tipc_printf(buf, "--------------------- "); 957 tipc_printf(buf, header[i]);
967 if (depth > 2)
968 tipc_printf(buf, "-------------------------- ");
969 if (depth > 3)
970 tipc_printf(buf, "------------------");
971
972 tipc_printf(buf, "\n"); 958 tipc_printf(buf, "\n");
973} 959}
974 960
@@ -1023,16 +1009,6 @@ static void nametbl_list(struct print_buf *buf, u32 depth_info,
1023 } 1009 }
1024} 1010}
1025 1011
1026#if 0
1027void tipc_nametbl_print(struct print_buf *buf, const char *str)
1028{
1029 tipc_printf(buf, str);
1030 read_lock_bh(&tipc_nametbl_lock);
1031 nametbl_list(buf, 0, 0, 0, 0);
1032 read_unlock_bh(&tipc_nametbl_lock);
1033}
1034#endif
1035
1036#define MAX_NAME_TBL_QUERY 32768 1012#define MAX_NAME_TBL_QUERY 32768
1037 1013
1038struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) 1014struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
@@ -1065,13 +1041,6 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
1065 return buf; 1041 return buf;
1066} 1042}
1067 1043
1068#if 0
1069void tipc_nametbl_dump(void)
1070{
1071 nametbl_list(TIPC_CONS, 0, 0, 0, 0);
1072}
1073#endif
1074
1075int tipc_nametbl_init(void) 1044int tipc_nametbl_init(void)
1076{ 1045{
1077 table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), 1046 table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f61b7694138b..1a621cfd6604 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -129,15 +129,6 @@ u32 tipc_net_select_router(u32 addr, u32 ref)
129 return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref); 129 return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
130} 130}
131 131
132#if 0
133u32 tipc_net_next_node(u32 a)
134{
135 if (tipc_net.zones[tipc_zone(a)])
136 return tipc_zone_next_node(a);
137 return 0;
138}
139#endif
140
141void tipc_net_remove_as_router(u32 router) 132void tipc_net_remove_as_router(u32 router)
142{ 133{
143 u32 z_num; 134 u32 z_num;
@@ -248,6 +239,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
248 239
249 /* Handle message for another node */ 240 /* Handle message for another node */
250 msg_dbg(msg, "NET>SEND>: "); 241 msg_dbg(msg, "NET>SEND>: ");
242 skb_trim(buf, msg_size(msg));
251 tipc_link_send(buf, dnode, msg_link_selector(msg)); 243 tipc_link_send(buf, dnode, msg_link_selector(msg));
252} 244}
253 245
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba5..b4d87eb2dc5d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -50,7 +50,8 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
50static void node_lost_contact(struct tipc_node *n_ptr); 50static void node_lost_contact(struct tipc_node *n_ptr);
51static void node_established_contact(struct tipc_node *n_ptr); 51static void node_established_contact(struct tipc_node *n_ptr);
52 52
53struct tipc_node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ 53/* sorted list of nodes within cluster */
54static struct tipc_node *tipc_nodes = NULL;
54 55
55static DEFINE_SPINLOCK(node_create_lock); 56static DEFINE_SPINLOCK(node_create_lock);
56 57
@@ -125,16 +126,6 @@ void tipc_node_delete(struct tipc_node *n_ptr)
125 if (!n_ptr) 126 if (!n_ptr)
126 return; 127 return;
127 128
128#if 0
129 /* Not needed because links are already deleted via tipc_bearer_stop() */
130
131 u32 l_num;
132
133 for (l_num = 0; l_num < MAX_BEARERS; l_num++) {
134 link_delete(n_ptr->links[l_num]);
135 }
136#endif
137
138 dbg("node %x deleted\n", n_ptr->addr); 129 dbg("node %x deleted\n", n_ptr->addr);
139 kfree(n_ptr); 130 kfree(n_ptr);
140} 131}
@@ -237,23 +228,22 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
237 228
238int tipc_node_has_active_links(struct tipc_node *n_ptr) 229int tipc_node_has_active_links(struct tipc_node *n_ptr)
239{ 230{
240 return (n_ptr && 231 return n_ptr->active_links[0] != NULL;
241 ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
242} 232}
243 233
244int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 234int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
245{ 235{
246 return (n_ptr->working_links > 1); 236 return n_ptr->working_links > 1;
247} 237}
248 238
249static int tipc_node_has_active_routes(struct tipc_node *n_ptr) 239static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
250{ 240{
251 return (n_ptr && (n_ptr->last_router >= 0)); 241 return n_ptr && (n_ptr->last_router >= 0);
252} 242}
253 243
254int tipc_node_is_up(struct tipc_node *n_ptr) 244int tipc_node_is_up(struct tipc_node *n_ptr)
255{ 245{
256 return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); 246 return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
257} 247}
258 248
259struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 249struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -384,6 +374,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
384 tipc_highest_allowed_slave); 374 tipc_highest_allowed_slave);
385} 375}
386 376
377static void node_cleanup_finished(unsigned long node_addr)
378{
379 struct tipc_node *n_ptr;
380
381 read_lock_bh(&tipc_net_lock);
382 n_ptr = tipc_node_find(node_addr);
383 if (n_ptr) {
384 tipc_node_lock(n_ptr);
385 n_ptr->cleanup_required = 0;
386 tipc_node_unlock(n_ptr);
387 }
388 read_unlock_bh(&tipc_net_lock);
389}
390
387static void node_lost_contact(struct tipc_node *n_ptr) 391static void node_lost_contact(struct tipc_node *n_ptr)
388{ 392{
389 struct cluster *c_ptr; 393 struct cluster *c_ptr;
@@ -458,6 +462,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
458 tipc_k_signal((Handler)ns->handle_node_down, 462 tipc_k_signal((Handler)ns->handle_node_down,
459 (unsigned long)ns->usr_handle); 463 (unsigned long)ns->usr_handle);
460 } 464 }
465
466 /* Prevent re-contact with node until all cleanup is done */
467
468 n_ptr->cleanup_required = 1;
469 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
461} 470}
462 471
463/** 472/**
@@ -579,38 +588,6 @@ void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
579 node_lost_contact(n_ptr); 588 node_lost_contact(n_ptr);
580} 589}
581 590
582#if 0
583void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str)
584{
585 u32 i;
586
587 tipc_printf(buf, "\n\n%s", str);
588 for (i = 0; i < MAX_BEARERS; i++) {
589 if (!n_ptr->links[i])
590 continue;
591 tipc_printf(buf, "Links[%u]: %x, ", i, n_ptr->links[i]);
592 }
593 tipc_printf(buf, "Active links: [%x,%x]\n",
594 n_ptr->active_links[0], n_ptr->active_links[1]);
595}
596#endif
597
598u32 tipc_available_nodes(const u32 domain)
599{
600 struct tipc_node *n_ptr;
601 u32 cnt = 0;
602
603 read_lock_bh(&tipc_net_lock);
604 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
605 if (!tipc_in_scope(domain, n_ptr->addr))
606 continue;
607 if (tipc_node_is_up(n_ptr))
608 cnt++;
609 }
610 read_unlock_bh(&tipc_net_lock);
611 return cnt;
612}
613
614struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) 591struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
615{ 592{
616 u32 domain; 593 u32 domain;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d143..fff331b2d26c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 54 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact
55 * @link_cnt: number of links to node 56 * @link_cnt: number of links to node
56 * @permit_changeover: non-zero if node has redundant links to this system 57 * @permit_changeover: non-zero if node has redundant links to this system
57 * @routers: bitmap (used for multicluster communication) 58 * @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
78 struct link *links[MAX_BEARERS]; 79 struct link *links[MAX_BEARERS];
79 int link_cnt; 80 int link_cnt;
80 int working_links; 81 int working_links;
82 int cleanup_required;
81 int permit_changeover; 83 int permit_changeover;
82 u32 routers[512/32]; 84 u32 routers[512/32];
83 int last_router; 85 int last_router;
@@ -94,7 +96,6 @@ struct tipc_node {
94 } bclink; 96 } bclink;
95}; 97};
96 98
97extern struct tipc_node *tipc_nodes;
98extern u32 tipc_own_tag; 99extern u32 tipc_own_tag;
99 100
100struct tipc_node *tipc_node_create(u32 addr); 101struct tipc_node *tipc_node_create(u32 addr);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e9266..82092eaa1536 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -293,34 +293,6 @@ int tipc_deleteport(u32 ref)
293 return 0; 293 return 0;
294} 294}
295 295
296/**
297 * tipc_get_port() - return port associated with 'ref'
298 *
299 * Note: Port is not locked.
300 */
301
302struct tipc_port *tipc_get_port(const u32 ref)
303{
304 return (struct tipc_port *)tipc_ref_deref(ref);
305}
306
307/**
308 * tipc_get_handle - return user handle associated to port 'ref'
309 */
310
311void *tipc_get_handle(const u32 ref)
312{
313 struct port *p_ptr;
314 void * handle;
315
316 p_ptr = tipc_port_lock(ref);
317 if (!p_ptr)
318 return NULL;
319 handle = p_ptr->publ.usr_handle;
320 tipc_port_unlock(p_ptr);
321 return handle;
322}
323
324static int port_unreliable(struct port *p_ptr) 296static int port_unreliable(struct port *p_ptr)
325{ 297{
326 return msg_src_droppable(&p_ptr->publ.phdr); 298 return msg_src_droppable(&p_ptr->publ.phdr);
@@ -392,7 +364,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
392 struct sk_buff *buf; 364 struct sk_buff *buf;
393 struct tipc_msg *msg; 365 struct tipc_msg *msg;
394 366
395 buf = buf_acquire(LONG_H_SIZE); 367 buf = tipc_buf_acquire(LONG_H_SIZE);
396 if (buf) { 368 if (buf) {
397 msg = buf_msg(buf); 369 msg = buf_msg(buf);
398 tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode); 370 tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
@@ -433,7 +405,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
433 hdr_sz = MCAST_H_SIZE; 405 hdr_sz = MCAST_H_SIZE;
434 else 406 else
435 hdr_sz = LONG_H_SIZE; 407 hdr_sz = LONG_H_SIZE;
436 rbuf = buf_acquire(data_sz + hdr_sz); 408 rbuf = tipc_buf_acquire(data_sz + hdr_sz);
437 if (rbuf == NULL) { 409 if (rbuf == NULL) {
438 buf_discard(buf); 410 buf_discard(buf);
439 return data_sz; 411 return data_sz;
@@ -588,19 +560,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
588 if (!p_ptr) { 560 if (!p_ptr) {
589 err = TIPC_ERR_NO_PORT; 561 err = TIPC_ERR_NO_PORT;
590 } else if (p_ptr->publ.connected) { 562 } else if (p_ptr->publ.connected) {
591 if (port_peernode(p_ptr) != msg_orignode(msg)) 563 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
564 (port_peerport(p_ptr) != msg_origport(msg))) {
592 err = TIPC_ERR_NO_PORT; 565 err = TIPC_ERR_NO_PORT;
593 if (port_peerport(p_ptr) != msg_origport(msg)) 566 } else if (msg_type(msg) == CONN_ACK) {
594 err = TIPC_ERR_NO_PORT;
595 if (!err && msg_routed(msg)) {
596 u32 seqno = msg_transp_seqno(msg);
597 u32 myno = ++p_ptr->last_in_seqno;
598 if (seqno != myno) {
599 err = TIPC_ERR_NO_PORT;
600 abort_buf = port_build_self_abort_msg(p_ptr, err);
601 }
602 }
603 if (msg_type(msg) == CONN_ACK) {
604 int wakeup = tipc_port_congested(p_ptr) && 567 int wakeup = tipc_port_congested(p_ptr) &&
605 p_ptr->publ.congested && 568 p_ptr->publ.congested &&
606 p_ptr->wakeup; 569 p_ptr->wakeup;
@@ -719,50 +682,6 @@ struct sk_buff *tipc_port_get_ports(void)
719 return buf; 682 return buf;
720} 683}
721 684
722#if 0
723
724#define MAX_PORT_STATS 2000
725
726struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space)
727{
728 u32 ref;
729 struct port *p_ptr;
730 struct sk_buff *buf;
731 struct tlv_desc *rep_tlv;
732 struct print_buf pb;
733 int str_len;
734
735 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_PORT_REF))
736 return cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
737
738 ref = *(u32 *)TLV_DATA(req_tlv_area);
739 ref = ntohl(ref);
740
741 p_ptr = tipc_port_lock(ref);
742 if (!p_ptr)
743 return cfg_reply_error_string("port not found");
744
745 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_STATS));
746 if (!buf) {
747 tipc_port_unlock(p_ptr);
748 return NULL;
749 }
750 rep_tlv = (struct tlv_desc *)buf->data;
751
752 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_STATS);
753 port_print(p_ptr, &pb, 1);
754 /* NEED TO FILL IN ADDITIONAL PORT STATISTICS HERE */
755 tipc_port_unlock(p_ptr);
756 str_len = tipc_printbuf_validate(&pb);
757
758 skb_put(buf, TLV_SPACE(str_len));
759 TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
760
761 return buf;
762}
763
764#endif
765
766void tipc_port_reinit(void) 685void tipc_port_reinit(void)
767{ 686{
768 struct port *p_ptr; 687 struct port *p_ptr;
@@ -1295,50 +1214,13 @@ int tipc_shutdown(u32 ref)
1295 return tipc_disconnect(ref); 1214 return tipc_disconnect(ref);
1296} 1215}
1297 1216
1298int tipc_isconnected(u32 ref, int *isconnected)
1299{
1300 struct port *p_ptr;
1301
1302 p_ptr = tipc_port_lock(ref);
1303 if (!p_ptr)
1304 return -EINVAL;
1305 *isconnected = p_ptr->publ.connected;
1306 tipc_port_unlock(p_ptr);
1307 return 0;
1308}
1309
1310int tipc_peer(u32 ref, struct tipc_portid *peer)
1311{
1312 struct port *p_ptr;
1313 int res;
1314
1315 p_ptr = tipc_port_lock(ref);
1316 if (!p_ptr)
1317 return -EINVAL;
1318 if (p_ptr->publ.connected) {
1319 peer->ref = port_peerport(p_ptr);
1320 peer->node = port_peernode(p_ptr);
1321 res = 0;
1322 } else
1323 res = -ENOTCONN;
1324 tipc_port_unlock(p_ptr);
1325 return res;
1326}
1327
1328int tipc_ref_valid(u32 ref)
1329{
1330 /* Works irrespective of type */
1331 return !!tipc_ref_deref(ref);
1332}
1333
1334
1335/* 1217/*
1336 * tipc_port_recv_sections(): Concatenate and deliver sectioned 1218 * tipc_port_recv_sections(): Concatenate and deliver sectioned
1337 * message for this node. 1219 * message for this node.
1338 */ 1220 */
1339 1221
1340int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, 1222static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
1341 struct iovec const *msg_sect) 1223 struct iovec const *msg_sect)
1342{ 1224{
1343 struct sk_buff *buf; 1225 struct sk_buff *buf;
1344 int res; 1226 int res;
@@ -1389,65 +1271,16 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1389} 1271}
1390 1272
1391/** 1273/**
1392 * tipc_send_buf - send message buffer on connection
1393 */
1394
1395int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
1396{
1397 struct port *p_ptr;
1398 struct tipc_msg *msg;
1399 u32 destnode;
1400 u32 hsz;
1401 u32 sz;
1402 u32 res;
1403
1404 p_ptr = tipc_port_deref(ref);
1405 if (!p_ptr || !p_ptr->publ.connected)
1406 return -EINVAL;
1407
1408 msg = &p_ptr->publ.phdr;
1409 hsz = msg_hdr_sz(msg);
1410 sz = hsz + dsz;
1411 msg_set_size(msg, sz);
1412 if (skb_cow(buf, hsz))
1413 return -ENOMEM;
1414
1415 skb_push(buf, hsz);
1416 skb_copy_to_linear_data(buf, msg, hsz);
1417 destnode = msg_destnode(msg);
1418 p_ptr->publ.congested = 1;
1419 if (!tipc_port_congested(p_ptr)) {
1420 if (likely(destnode != tipc_own_addr))
1421 res = tipc_send_buf_fast(buf, destnode);
1422 else {
1423 tipc_port_recv_msg(buf);
1424 res = sz;
1425 }
1426 if (likely(res != -ELINKCONG)) {
1427 port_incr_out_seqno(p_ptr);
1428 p_ptr->sent++;
1429 p_ptr->publ.congested = 0;
1430 return res;
1431 }
1432 }
1433 if (port_unreliable(p_ptr)) {
1434 p_ptr->publ.congested = 0;
1435 return dsz;
1436 }
1437 return -ELINKCONG;
1438}
1439
1440/**
1441 * tipc_forward2name - forward message sections to port name 1274 * tipc_forward2name - forward message sections to port name
1442 */ 1275 */
1443 1276
1444int tipc_forward2name(u32 ref, 1277static int tipc_forward2name(u32 ref,
1445 struct tipc_name const *name, 1278 struct tipc_name const *name,
1446 u32 domain, 1279 u32 domain,
1447 u32 num_sect, 1280 u32 num_sect,
1448 struct iovec const *msg_sect, 1281 struct iovec const *msg_sect,
1449 struct tipc_portid const *orig, 1282 struct tipc_portid const *orig,
1450 unsigned int importance) 1283 unsigned int importance)
1451{ 1284{
1452 struct port *p_ptr; 1285 struct port *p_ptr;
1453 struct tipc_msg *msg; 1286 struct tipc_msg *msg;
@@ -1473,7 +1306,7 @@ int tipc_forward2name(u32 ref,
1473 msg_set_destnode(msg, destnode); 1306 msg_set_destnode(msg, destnode);
1474 msg_set_destport(msg, destport); 1307 msg_set_destport(msg, destport);
1475 1308
1476 if (likely(destport || destnode)) { 1309 if (likely(destport)) {
1477 p_ptr->sent++; 1310 p_ptr->sent++;
1478 if (likely(destnode == tipc_own_addr)) 1311 if (likely(destnode == tipc_own_addr))
1479 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1312 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1510,89 +1343,15 @@ int tipc_send2name(u32 ref,
1510} 1343}
1511 1344
1512/** 1345/**
1513 * tipc_forward_buf2name - forward message buffer to port name
1514 */
1515
1516int tipc_forward_buf2name(u32 ref,
1517 struct tipc_name const *name,
1518 u32 domain,
1519 struct sk_buff *buf,
1520 unsigned int dsz,
1521 struct tipc_portid const *orig,
1522 unsigned int importance)
1523{
1524 struct port *p_ptr;
1525 struct tipc_msg *msg;
1526 u32 destnode = domain;
1527 u32 destport;
1528 int res;
1529
1530 p_ptr = (struct port *)tipc_ref_deref(ref);
1531 if (!p_ptr || p_ptr->publ.connected)
1532 return -EINVAL;
1533
1534 msg = &p_ptr->publ.phdr;
1535 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1536 msg_set_importance(msg, importance);
1537 msg_set_type(msg, TIPC_NAMED_MSG);
1538 msg_set_orignode(msg, orig->node);
1539 msg_set_origport(msg, orig->ref);
1540 msg_set_nametype(msg, name->type);
1541 msg_set_nameinst(msg, name->instance);
1542 msg_set_lookup_scope(msg, tipc_addr_scope(domain));
1543 msg_set_hdr_sz(msg, LONG_H_SIZE);
1544 msg_set_size(msg, LONG_H_SIZE + dsz);
1545 destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
1546 msg_set_destnode(msg, destnode);
1547 msg_set_destport(msg, destport);
1548 msg_dbg(msg, "forw2name ==> ");
1549 if (skb_cow(buf, LONG_H_SIZE))
1550 return -ENOMEM;
1551 skb_push(buf, LONG_H_SIZE);
1552 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1553 msg_dbg(buf_msg(buf),"PREP:");
1554 if (likely(destport || destnode)) {
1555 p_ptr->sent++;
1556 if (destnode == tipc_own_addr)
1557 return tipc_port_recv_msg(buf);
1558 res = tipc_send_buf_fast(buf, destnode);
1559 if (likely(res != -ELINKCONG))
1560 return res;
1561 if (port_unreliable(p_ptr))
1562 return dsz;
1563 return -ELINKCONG;
1564 }
1565 return tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
1566}
1567
1568/**
1569 * tipc_send_buf2name - send message buffer to port name
1570 */
1571
1572int tipc_send_buf2name(u32 ref,
1573 struct tipc_name const *dest,
1574 u32 domain,
1575 struct sk_buff *buf,
1576 unsigned int dsz)
1577{
1578 struct tipc_portid orig;
1579
1580 orig.ref = ref;
1581 orig.node = tipc_own_addr;
1582 return tipc_forward_buf2name(ref, dest, domain, buf, dsz, &orig,
1583 TIPC_PORT_IMPORTANCE);
1584}
1585
1586/**
1587 * tipc_forward2port - forward message sections to port identity 1346 * tipc_forward2port - forward message sections to port identity
1588 */ 1347 */
1589 1348
1590int tipc_forward2port(u32 ref, 1349static int tipc_forward2port(u32 ref,
1591 struct tipc_portid const *dest, 1350 struct tipc_portid const *dest,
1592 unsigned int num_sect, 1351 unsigned int num_sect,
1593 struct iovec const *msg_sect, 1352 struct iovec const *msg_sect,
1594 struct tipc_portid const *orig, 1353 struct tipc_portid const *orig,
1595 unsigned int importance) 1354 unsigned int importance)
1596{ 1355{
1597 struct port *p_ptr; 1356 struct port *p_ptr;
1598 struct tipc_msg *msg; 1357 struct tipc_msg *msg;
@@ -1644,12 +1403,12 @@ int tipc_send2port(u32 ref,
1644/** 1403/**
1645 * tipc_forward_buf2port - forward message buffer to port identity 1404 * tipc_forward_buf2port - forward message buffer to port identity
1646 */ 1405 */
1647int tipc_forward_buf2port(u32 ref, 1406static int tipc_forward_buf2port(u32 ref,
1648 struct tipc_portid const *dest, 1407 struct tipc_portid const *dest,
1649 struct sk_buff *buf, 1408 struct sk_buff *buf,
1650 unsigned int dsz, 1409 unsigned int dsz,
1651 struct tipc_portid const *orig, 1410 struct tipc_portid const *orig,
1652 unsigned int importance) 1411 unsigned int importance)
1653{ 1412{
1654 struct port *p_ptr; 1413 struct port *p_ptr;
1655 struct tipc_msg *msg; 1414 struct tipc_msg *msg;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8d1652aab298..73bbf442b346 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -109,8 +109,6 @@ struct port {
109extern spinlock_t tipc_port_list_lock; 109extern spinlock_t tipc_port_list_lock;
110struct port_list; 110struct port_list;
111 111
112int tipc_port_recv_sections(struct port *p_ptr, u32 num_sect,
113 struct iovec const *msg_sect);
114int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 112int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
115 struct iovec const *msg_sect, u32 num_sect, 113 struct iovec const *msg_sect, u32 num_sect,
116 int err); 114 int err);
@@ -157,7 +155,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr)
157 155
158static inline int tipc_port_congested(struct port *p_ptr) 156static inline int tipc_port_congested(struct port *p_ptr)
159{ 157{
160 return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2)); 158 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
161} 159}
162 160
163/** 161/**
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 8dea66500cf5..ab8ad32d8c20 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -282,23 +282,6 @@ void *tipc_ref_lock(u32 ref)
282 return NULL; 282 return NULL;
283} 283}
284 284
285/**
286 * tipc_ref_unlock - unlock referenced object
287 */
288
289void tipc_ref_unlock(u32 ref)
290{
291 if (likely(tipc_ref_table.entries)) {
292 struct reference *entry;
293
294 entry = &tipc_ref_table.entries[ref &
295 tipc_ref_table.index_mask];
296 if (likely((entry->ref == ref) && (entry->object)))
297 spin_unlock_bh(&entry->lock);
298 else
299 err("Attempt to unlock non-existent reference\n");
300 }
301}
302 285
303/** 286/**
304 * tipc_ref_deref - return pointer referenced object (without locking it) 287 * tipc_ref_deref - return pointer referenced object (without locking it)
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 7e3798ea93b9..5bc8e7ab84de 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -44,7 +44,6 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock);
44void tipc_ref_discard(u32 ref); 44void tipc_ref_discard(u32 ref);
45 45
46void *tipc_ref_lock(u32 ref); 46void *tipc_ref_lock(u32 ref);
47void tipc_ref_unlock(u32 ref);
48void *tipc_ref_deref(u32 ref); 47void *tipc_ref_deref(u32 ref);
49 48
50#endif 49#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48fd..e9f0d5004483 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct tipc_portid peer_name; 66 struct tipc_portid peer_name;
67 long conn_timeout;
67}; 68};
68 69
69#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 70#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
240 sock->state = state; 241 sock->state = state;
241 242
242 sock_init_data(sock, sk); 243 sock_init_data(sock, sk);
243 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244 sk->sk_backlog_rcv = backlog_rcv; 244 sk->sk_backlog_rcv = backlog_rcv;
245 tipc_sk(sk)->p = tp_ptr; 245 tipc_sk(sk)->p = tp_ptr;
246 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246 247
247 spin_unlock_bh(tp_ptr->lock); 248 spin_unlock_bh(tp_ptr->lock);
248 249
@@ -395,6 +396,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
395 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 396 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
396 struct tipc_sock *tsock = tipc_sk(sock->sk); 397 struct tipc_sock *tsock = tipc_sk(sock->sk);
397 398
399 memset(addr, 0, sizeof(*addr));
398 if (peer) { 400 if (peer) {
399 if ((sock->state != SS_CONNECTED) && 401 if ((sock->state != SS_CONNECTED) &&
400 ((peer != 2) || (sock->state != SS_DISCONNECTING))) 402 ((peer != 2) || (sock->state != SS_DISCONNECTING)))
@@ -429,36 +431,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
429 * to handle any preventable race conditions, so TIPC will do the same ... 431 * to handle any preventable race conditions, so TIPC will do the same ...
430 * 432 *
431 * TIPC sets the returned events as follows: 433 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty 434 *
433 * or if a connection-oriented socket is does not have an active connection 435 * socket state flags set
434 * (i.e. a read operation will not block). 436 * ------------ ---------
435 * b) POLLOUT is set except when a socket's connection has been terminated 437 * unconnected no read flags
436 * (i.e. a write operation will not block). 438 * no write flags
437 * c) POLLHUP is set when a socket's connection has been terminated. 439 *
438 * 440 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
439 * IMPORTANT: The fact that a read or write operation will not block does NOT 441 * no write flags
440 * imply that the operation will succeed! 442 *
443 * connected POLLIN/POLLRDNORM if data in rx queue
444 * POLLOUT if port is not congested
445 *
446 * disconnecting POLLIN/POLLRDNORM/POLLHUP
447 * no write flags
448 *
449 * listening POLLIN if SYN in rx queue
450 * no write flags
451 *
452 * ready POLLIN/POLLRDNORM if data in rx queue
453 * [connectionless] POLLOUT (since port cannot be congested)
454 *
455 * IMPORTANT: The fact that a read or write operation is indicated does NOT
456 * imply that the operation will succeed, merely that it should be performed
457 * and will not block.
441 */ 458 */
442 459
443static unsigned int poll(struct file *file, struct socket *sock, 460static unsigned int poll(struct file *file, struct socket *sock,
444 poll_table *wait) 461 poll_table *wait)
445{ 462{
446 struct sock *sk = sock->sk; 463 struct sock *sk = sock->sk;
447 u32 mask; 464 u32 mask = 0;
448 465
449 poll_wait(file, sk_sleep(sk), wait); 466 poll_wait(file, sk_sleep(sk), wait);
450 467
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 468 switch ((int)sock->state) {
452 (sock->state == SS_UNCONNECTED) || 469 case SS_READY:
453 (sock->state == SS_DISCONNECTING)) 470 case SS_CONNECTED:
454 mask = (POLLRDNORM | POLLIN); 471 if (!tipc_sk_port(sk)->congested)
455 else 472 mask |= POLLOUT;
456 mask = 0; 473 /* fall thru' */
457 474 case SS_CONNECTING:
458 if (sock->state == SS_DISCONNECTING) 475 case SS_LISTENING:
459 mask |= POLLHUP; 476 if (!skb_queue_empty(&sk->sk_receive_queue))
460 else 477 mask |= (POLLIN | POLLRDNORM);
461 mask |= POLLOUT; 478 break;
479 case SS_DISCONNECTING:
480 mask = (POLLIN | POLLRDNORM | POLLHUP);
481 break;
482 }
462 483
463 return mask; 484 return mask;
464} 485}
@@ -1026,9 +1047,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1026 struct sk_buff *buf; 1047 struct sk_buff *buf;
1027 struct tipc_msg *msg; 1048 struct tipc_msg *msg;
1028 unsigned int sz; 1049 unsigned int sz;
1029 int sz_to_copy; 1050 int sz_to_copy, target, needed;
1030 int sz_copied = 0; 1051 int sz_copied = 0;
1031 int needed;
1032 char __user *crs = m->msg_iov->iov_base; 1052 char __user *crs = m->msg_iov->iov_base;
1033 unsigned char *buf_crs; 1053 unsigned char *buf_crs;
1034 u32 err; 1054 u32 err;
@@ -1050,6 +1070,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1050 goto exit; 1070 goto exit;
1051 } 1071 }
1052 1072
1073 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1074
1053restart: 1075restart:
1054 1076
1055 /* Look for a message in receive queue; wait if necessary */ 1077 /* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1160,7 @@ restart:
1138 1160
1139 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1161 if ((sz_copied < buf_len) && /* didn't get all requested data */
1140 (!skb_queue_empty(&sk->sk_receive_queue) || 1162 (!skb_queue_empty(&sk->sk_receive_queue) ||
1141 (flags & MSG_WAITALL)) && /* and more is ready or required */ 1163 (sz_copied < target)) && /* and more is ready or required */
1142 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ 1164 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1143 (!err)) /* and haven't reached a FIN */ 1165 (!err)) /* and haven't reached a FIN */
1144 goto restart; 1166 goto restart;
@@ -1174,7 +1196,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1174 if (msg_connected(msg)) 1196 if (msg_connected(msg))
1175 threshold *= 4; 1197 threshold *= 4;
1176 1198
1177 return (queue_size >= threshold); 1199 return queue_size >= threshold;
1178} 1200}
1179 1201
1180/** 1202/**
@@ -1365,6 +1387,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1365 struct msghdr m = {NULL,}; 1387 struct msghdr m = {NULL,};
1366 struct sk_buff *buf; 1388 struct sk_buff *buf;
1367 struct tipc_msg *msg; 1389 struct tipc_msg *msg;
1390 long timeout;
1368 int res; 1391 int res;
1369 1392
1370 lock_sock(sk); 1393 lock_sock(sk);
@@ -1379,7 +1402,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1379 /* For now, TIPC does not support the non-blocking form of connect() */ 1402 /* For now, TIPC does not support the non-blocking form of connect() */
1380 1403
1381 if (flags & O_NONBLOCK) { 1404 if (flags & O_NONBLOCK) {
1382 res = -EWOULDBLOCK; 1405 res = -EOPNOTSUPP;
1383 goto exit; 1406 goto exit;
1384 } 1407 }
1385 1408
@@ -1425,11 +1448,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1425 1448
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1449 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1450
1451 timeout = tipc_sk(sk)->conn_timeout;
1428 release_sock(sk); 1452 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1453 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1454 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1455 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1456 timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1433 lock_sock(sk); 1457 lock_sock(sk);
1434 1458
1435 if (res > 0) { 1459 if (res > 0) {
@@ -1692,7 +1716,7 @@ static int setsockopt(struct socket *sock,
1692 res = tipc_set_portunreturnable(tport->ref, value); 1716 res = tipc_set_portunreturnable(tport->ref, value);
1693 break; 1717 break;
1694 case TIPC_CONN_TIMEOUT: 1718 case TIPC_CONN_TIMEOUT:
1695 sk->sk_rcvtimeo = msecs_to_jiffies(value); 1719 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1696 /* no need to set "res", since already 0 at this point */ 1720 /* no need to set "res", since already 0 at this point */
1697 break; 1721 break;
1698 default: 1722 default:
@@ -1747,7 +1771,7 @@ static int getsockopt(struct socket *sock,
1747 res = tipc_portunreturnable(tport->ref, &value); 1771 res = tipc_portunreturnable(tport->ref, &value);
1748 break; 1772 break;
1749 case TIPC_CONN_TIMEOUT: 1773 case TIPC_CONN_TIMEOUT:
1750 value = jiffies_to_msecs(sk->sk_rcvtimeo); 1774 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1751 /* no need to set "res", since already 0 at this point */ 1775 /* no need to set "res", since already 0 at this point */
1752 break; 1776 break;
1753 case TIPC_NODE_RECVQ_DEPTH: 1777 case TIPC_NODE_RECVQ_DEPTH:
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e2..33313961d010 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -76,6 +76,19 @@ struct top_srv {
76static struct top_srv topsrv = { 0 }; 76static struct top_srv topsrv = { 0 };
77 77
78/** 78/**
79 * htohl - convert value to endianness used by destination
80 * @in: value to convert
81 * @swap: non-zero if endianness must be reversed
82 *
83 * Returns converted value
84 */
85
86static u32 htohl(u32 in, int swap)
87{
88 return swap ? swab32(in) : in;
89}
90
91/**
79 * subscr_send_event - send a message containing a tipc_event to the subscriber 92 * subscr_send_event - send a message containing a tipc_event to the subscriber
80 * 93 *
81 * Note: Must not hold subscriber's server port lock, since tipc_send() will 94 * Note: Must not hold subscriber's server port lock, since tipc_send() will
@@ -94,11 +107,11 @@ static void subscr_send_event(struct subscription *sub,
94 msg_sect.iov_base = (void *)&sub->evt; 107 msg_sect.iov_base = (void *)&sub->evt;
95 msg_sect.iov_len = sizeof(struct tipc_event); 108 msg_sect.iov_len = sizeof(struct tipc_event);
96 109
97 sub->evt.event = htonl(event); 110 sub->evt.event = htohl(event, sub->swap);
98 sub->evt.found_lower = htonl(found_lower); 111 sub->evt.found_lower = htohl(found_lower, sub->swap);
99 sub->evt.found_upper = htonl(found_upper); 112 sub->evt.found_upper = htohl(found_upper, sub->swap);
100 sub->evt.port.ref = htonl(port_ref); 113 sub->evt.port.ref = htohl(port_ref, sub->swap);
101 sub->evt.port.node = htonl(node); 114 sub->evt.port.node = htohl(node, sub->swap);
102 tipc_send(sub->server_ref, 1, &msg_sect); 115 tipc_send(sub->server_ref, 1, &msg_sect);
103} 116}
104 117
@@ -274,29 +287,16 @@ static void subscr_cancel(struct tipc_subscr *s,
274{ 287{
275 struct subscription *sub; 288 struct subscription *sub;
276 struct subscription *sub_temp; 289 struct subscription *sub_temp;
277 __u32 type, lower, upper, timeout, filter;
278 int found = 0; 290 int found = 0;
279 291
280 /* Find first matching subscription, exit if not found */ 292 /* Find first matching subscription, exit if not found */
281 293
282 type = ntohl(s->seq.type);
283 lower = ntohl(s->seq.lower);
284 upper = ntohl(s->seq.upper);
285 timeout = ntohl(s->timeout);
286 filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
287
288 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 294 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
289 subscription_list) { 295 subscription_list) {
290 if ((type == sub->seq.type) && 296 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
291 (lower == sub->seq.lower) && 297 found = 1;
292 (upper == sub->seq.upper) && 298 break;
293 (timeout == sub->timeout) && 299 }
294 (filter == sub->filter) &&
295 !memcmp(s->usr_handle,sub->evt.s.usr_handle,
296 sizeof(s->usr_handle)) ){
297 found = 1;
298 break;
299 }
300 } 300 }
301 if (!found) 301 if (!found)
302 return; 302 return;
@@ -310,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
310 k_term_timer(&sub->timer); 310 k_term_timer(&sub->timer);
311 spin_lock_bh(subscriber->lock); 311 spin_lock_bh(subscriber->lock);
312 } 312 }
313 dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n", 313 dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
314 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber); 314 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
315 subscr_del(sub); 315 subscr_del(sub);
316} 316}
@@ -325,10 +325,16 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
325 struct subscriber *subscriber) 325 struct subscriber *subscriber)
326{ 326{
327 struct subscription *sub; 327 struct subscription *sub;
328 int swap;
329
330 /* Determine subscriber's endianness */
331
332 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
328 333
329 /* Detect & process a subscription cancellation request */ 334 /* Detect & process a subscription cancellation request */
330 335
331 if (ntohl(s->filter) & TIPC_SUB_CANCEL) { 336 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
337 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
332 subscr_cancel(s, subscriber); 338 subscr_cancel(s, subscriber);
333 return NULL; 339 return NULL;
334 } 340 }
@@ -353,12 +359,13 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
353 359
354 /* Initialize subscription object */ 360 /* Initialize subscription object */
355 361
356 sub->seq.type = ntohl(s->seq.type); 362 sub->seq.type = htohl(s->seq.type, swap);
357 sub->seq.lower = ntohl(s->seq.lower); 363 sub->seq.lower = htohl(s->seq.lower, swap);
358 sub->seq.upper = ntohl(s->seq.upper); 364 sub->seq.upper = htohl(s->seq.upper, swap);
359 sub->timeout = ntohl(s->timeout); 365 sub->timeout = htohl(s->timeout, swap);
360 sub->filter = ntohl(s->filter); 366 sub->filter = htohl(s->filter, swap);
361 if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) || 367 if ((!(sub->filter & TIPC_SUB_PORTS) ==
368 !(sub->filter & TIPC_SUB_SERVICE)) ||
362 (sub->seq.lower > sub->seq.upper)) { 369 (sub->seq.lower > sub->seq.upper)) {
363 warn("Subscription rejected, illegal request\n"); 370 warn("Subscription rejected, illegal request\n");
364 kfree(sub); 371 kfree(sub);
@@ -369,6 +376,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
369 INIT_LIST_HEAD(&sub->nameseq_list); 376 INIT_LIST_HEAD(&sub->nameseq_list);
370 list_add(&sub->subscription_list, &subscriber->subscription_list); 377 list_add(&sub->subscription_list, &subscriber->subscription_list);
371 sub->server_ref = subscriber->port_ref; 378 sub->server_ref = subscriber->port_ref;
379 sub->swap = swap;
372 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); 380 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
373 atomic_inc(&topsrv.subscription_count); 381 atomic_inc(&topsrv.subscription_count);
374 if (sub->timeout != TIPC_WAIT_FOREVER) { 382 if (sub->timeout != TIPC_WAIT_FOREVER) {
@@ -598,12 +606,3 @@ void tipc_subscr_stop(void)
598 topsrv.user_ref = 0; 606 topsrv.user_ref = 0;
599 } 607 }
600} 608}
601
602
603int tipc_ispublished(struct tipc_name const *name)
604{
605 u32 domain = 0;
606
607 return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0);
608}
609
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index c20f496d95b2..45d89bf4d202 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -53,6 +53,7 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
54 * @subscription_list: adjacent subscriptions in subscriber's subscription list 54 * @subscription_list: adjacent subscriptions in subscriber's subscription list
55 * @server_ref: object reference of server port associated with subscription 55 * @server_ref: object reference of server port associated with subscription
56 * @swap: indicates if subscriber uses opposite endianness in its messages
56 * @evt: template for events generated by subscription 57 * @evt: template for events generated by subscription
57 */ 58 */
58 59
@@ -65,6 +66,7 @@ struct subscription {
65 struct list_head nameseq_list; 66 struct list_head nameseq_list;
66 struct list_head subscription_list; 67 struct list_head subscription_list;
67 u32 server_ref; 68 u32 server_ref;
69 int swap;
68 struct tipc_event evt; 70 struct tipc_event evt;
69}; 71};
70 72
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 2c01ba2d86bf..83f8b5e91fc8 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -160,14 +160,3 @@ u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
160 } 160 }
161 return 0; 161 return 0;
162} 162}
163
164
165u32 tipc_zone_next_node(u32 addr)
166{
167 struct cluster *c_ptr = tipc_cltr_find(addr);
168
169 if (c_ptr)
170 return tipc_cltr_next_node(c_ptr, addr);
171 return 0;
172}
173
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
index 7bdc3406ba9b..bd1c20ce9d06 100644
--- a/net/tipc/zone.h
+++ b/net/tipc/zone.h
@@ -61,7 +61,6 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
61struct _zone *tipc_zone_create(u32 addr); 61struct _zone *tipc_zone_create(u32 addr);
62void tipc_zone_delete(struct _zone *z_ptr); 62void tipc_zone_delete(struct _zone *z_ptr);
63void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr); 63void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
64u32 tipc_zone_next_node(u32 addr);
65 64
66static inline struct _zone *tipc_zone_find(u32 addr) 65static inline struct _zone *tipc_zone_find(u32 addr)
67{ 66{
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b39b2451ea5..3c95304a0817 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,7 +117,7 @@
117 117
118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; 118static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
119static DEFINE_SPINLOCK(unix_table_lock); 119static DEFINE_SPINLOCK(unix_table_lock);
120static atomic_t unix_nr_socks = ATOMIC_INIT(0); 120static atomic_long_t unix_nr_socks;
121 121
122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) 122#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
123 123
@@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
360 if (u->addr) 360 if (u->addr)
361 unix_release_addr(u->addr); 361 unix_release_addr(u->addr);
362 362
363 atomic_dec(&unix_nr_socks); 363 atomic_long_dec(&unix_nr_socks);
364 local_bh_disable(); 364 local_bh_disable();
365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 365 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
366 local_bh_enable(); 366 local_bh_enable();
367#ifdef UNIX_REFCNT_DEBUG 367#ifdef UNIX_REFCNT_DEBUG
368 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, 368 printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
369 atomic_read(&unix_nr_socks)); 369 atomic_long_read(&unix_nr_socks));
370#endif 370#endif
371} 371}
372 372
@@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
606 struct sock *sk = NULL; 606 struct sock *sk = NULL;
607 struct unix_sock *u; 607 struct unix_sock *u;
608 608
609 atomic_inc(&unix_nr_socks); 609 atomic_long_inc(&unix_nr_socks);
610 if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) 610 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
611 goto out; 611 goto out;
612 612
613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); 613 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
@@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
632 unix_insert_socket(unix_sockets_unbound, sk); 632 unix_insert_socket(unix_sockets_unbound, sk);
633out: 633out:
634 if (sk == NULL) 634 if (sk == NULL)
635 atomic_dec(&unix_nr_socks); 635 atomic_long_dec(&unix_nr_socks);
636 else { 636 else {
637 local_bh_disable(); 637 local_bh_disable();
638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 638 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -1511,6 +1511,8 @@ restart:
1511 goto restart; 1511 goto restart;
1512 } 1512 }
1513 1513
1514 if (sock_flag(other, SOCK_RCVTSTAMP))
1515 __net_timestamp(skb);
1514 skb_queue_tail(&other->sk_receive_queue, skb); 1516 skb_queue_tail(&other->sk_receive_queue, skb);
1515 unix_state_unlock(other); 1517 unix_state_unlock(other);
1516 other->sk_data_ready(other, len); 1518 other->sk_data_ready(other, len);
@@ -1722,6 +1724,9 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1722 if (err) 1724 if (err)
1723 goto out_free; 1725 goto out_free;
1724 1726
1727 if (sock_flag(sk, SOCK_RCVTSTAMP))
1728 __sock_recv_timestamp(msg, sk, skb);
1729
1725 if (!siocb->scm) { 1730 if (!siocb->scm) {
1726 siocb->scm = &tmp_scm; 1731 siocb->scm = &tmp_scm;
1727 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1732 memset(&tmp_scm, 0, sizeof(tmp_scm));
@@ -2033,11 +2038,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2033 if (sk->sk_shutdown == SHUTDOWN_MASK) 2038 if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 mask |= POLLHUP; 2039 mask |= POLLHUP;
2035 if (sk->sk_shutdown & RCV_SHUTDOWN) 2040 if (sk->sk_shutdown & RCV_SHUTDOWN)
2036 mask |= POLLRDHUP; 2041 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2037 2042
2038 /* readable? */ 2043 /* readable? */
2039 if (!skb_queue_empty(&sk->sk_receive_queue) || 2044 if (!skb_queue_empty(&sk->sk_receive_queue))
2040 (sk->sk_shutdown & RCV_SHUTDOWN))
2041 mask |= POLLIN | POLLRDNORM; 2045 mask |= POLLIN | POLLRDNORM;
2042 2046
2043 /* Connection-based need to check for termination and startup */ 2047 /* Connection-based need to check for termination and startup */
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 2bf23406637a..74944a2dd436 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -471,7 +471,7 @@ static int wanrouter_device_setup(struct wan_device *wandev,
471 data = vmalloc(conf->data_size); 471 data = vmalloc(conf->data_size);
472 if (!data) { 472 if (!data) {
473 printk(KERN_INFO 473 printk(KERN_INFO
474 "%s: ERROR, Faild allocate kernel memory !\n", 474 "%s: ERROR, Failed allocate kernel memory !\n",
475 wandev->name); 475 wandev->name);
476 kfree(conf); 476 kfree(conf);
477 return -ENOBUFS; 477 return -ENOBUFS;
@@ -481,7 +481,7 @@ static int wanrouter_device_setup(struct wan_device *wandev,
481 err = wandev->setup(wandev, conf); 481 err = wandev->setup(wandev, conf);
482 } else { 482 } else {
483 printk(KERN_INFO 483 printk(KERN_INFO
484 "%s: ERROR, Faild to copy from user data !\n", 484 "%s: ERROR, Failed to copy from user data !\n",
485 wandev->name); 485 wandev->name);
486 err = -EFAULT; 486 err = -EFAULT;
487 } 487 }
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6d046b9f6f2..9c21ebf9780e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
253 WARN_ON(err); 253 WARN_ON(err);
254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL; 254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
255 } 255 }
256
257 return err;
256 } 258 }
257 259
258 wiphy_net_set(&rdev->wiphy, net); 260 wiphy_net_set(&rdev->wiphy, net);
259 261
260 return err; 262 err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
263 WARN_ON(err);
264
265 return 0;
261} 266}
262 267
263static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) 268static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
@@ -428,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
428 433
429 /* sanity check ifmodes */ 434 /* sanity check ifmodes */
430 WARN_ON(!ifmodes); 435 WARN_ON(!ifmodes);
431 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 436 ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
432 if (WARN_ON(ifmodes != wiphy->interface_modes)) 437 if (WARN_ON(ifmodes != wiphy->interface_modes))
433 wiphy->interface_modes = ifmodes; 438 wiphy->interface_modes = ifmodes;
434 439
@@ -683,8 +688,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
683 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); 688 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
684 INIT_LIST_HEAD(&wdev->event_list); 689 INIT_LIST_HEAD(&wdev->event_list);
685 spin_lock_init(&wdev->event_lock); 690 spin_lock_init(&wdev->event_lock);
686 INIT_LIST_HEAD(&wdev->action_registrations); 691 INIT_LIST_HEAD(&wdev->mgmt_registrations);
687 spin_lock_init(&wdev->action_registrations_lock); 692 spin_lock_init(&wdev->mgmt_registrations_lock);
688 693
689 mutex_lock(&rdev->devlist_mtx); 694 mutex_lock(&rdev->devlist_mtx);
690 list_add_rcu(&wdev->list, &rdev->netdev_list); 695 list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -724,6 +729,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
724 dev->ethtool_ops = &cfg80211_ethtool_ops; 729 dev->ethtool_ops = &cfg80211_ethtool_ops;
725 730
726 if ((wdev->iftype == NL80211_IFTYPE_STATION || 731 if ((wdev->iftype == NL80211_IFTYPE_STATION ||
732 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
727 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) 733 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
728 dev->priv_flags |= IFF_DONT_BRIDGE; 734 dev->priv_flags |= IFF_DONT_BRIDGE;
729 break; 735 break;
@@ -732,6 +738,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
732 case NL80211_IFTYPE_ADHOC: 738 case NL80211_IFTYPE_ADHOC:
733 cfg80211_leave_ibss(rdev, dev, true); 739 cfg80211_leave_ibss(rdev, dev, true);
734 break; 740 break;
741 case NL80211_IFTYPE_P2P_CLIENT:
735 case NL80211_IFTYPE_STATION: 742 case NL80211_IFTYPE_STATION:
736 wdev_lock(wdev); 743 wdev_lock(wdev);
737#ifdef CONFIG_CFG80211_WEXT 744#ifdef CONFIG_CFG80211_WEXT
@@ -804,7 +811,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
804 sysfs_remove_link(&dev->dev.kobj, "phy80211"); 811 sysfs_remove_link(&dev->dev.kobj, "phy80211");
805 list_del_rcu(&wdev->list); 812 list_del_rcu(&wdev->list);
806 rdev->devlist_generation++; 813 rdev->devlist_generation++;
807 cfg80211_mlme_purge_actions(wdev); 814 cfg80211_mlme_purge_registrations(wdev);
808#ifdef CONFIG_CFG80211_WEXT 815#ifdef CONFIG_CFG80211_WEXT
809 kfree(wdev->wext.keys); 816 kfree(wdev->wext.keys);
810#endif 817#endif
@@ -910,52 +917,3 @@ static void __exit cfg80211_exit(void)
910 destroy_workqueue(cfg80211_wq); 917 destroy_workqueue(cfg80211_wq);
911} 918}
912module_exit(cfg80211_exit); 919module_exit(cfg80211_exit);
913
914static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
915 struct va_format *vaf)
916{
917 if (!wiphy)
918 return printk("%s(NULL wiphy *): %pV", level, vaf);
919
920 return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
921}
922
923int __wiphy_printk(const char *level, const struct wiphy *wiphy,
924 const char *fmt, ...)
925{
926 struct va_format vaf;
927 va_list args;
928 int r;
929
930 va_start(args, fmt);
931
932 vaf.fmt = fmt;
933 vaf.va = &args;
934
935 r = ___wiphy_printk(level, wiphy, &vaf);
936 va_end(args);
937
938 return r;
939}
940EXPORT_SYMBOL(__wiphy_printk);
941
942#define define_wiphy_printk_level(func, kern_level) \
943int func(const struct wiphy *wiphy, const char *fmt, ...) \
944{ \
945 struct va_format vaf; \
946 va_list args; \
947 int r; \
948 \
949 va_start(args, fmt); \
950 \
951 vaf.fmt = fmt; \
952 vaf.va = &args; \
953 \
954 r = ___wiphy_printk(kern_level, wiphy, &vaf); \
955 va_end(args); \
956 \
957 return r; \
958} \
959EXPORT_SYMBOL(func);
960
961define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c3..6583cca0e2ee 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
86static inline 86static inline
87bool wiphy_idx_valid(int wiphy_idx) 87bool wiphy_idx_valid(int wiphy_idx)
88{ 88{
89 return (wiphy_idx >= 0); 89 return wiphy_idx >= 0;
90} 90}
91 91
92 92
@@ -95,7 +95,10 @@ extern struct mutex cfg80211_mutex;
95extern struct list_head cfg80211_rdev_list; 95extern struct list_head cfg80211_rdev_list;
96extern int cfg80211_rdev_list_generation; 96extern int cfg80211_rdev_list_generation;
97 97
98#define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex)) 98static inline void assert_cfg80211_lock(void)
99{
100 lockdep_assert_held(&cfg80211_mutex);
101}
99 102
100/* 103/*
101 * You can use this to mark a wiphy_idx as not having an associated wiphy. 104 * You can use this to mark a wiphy_idx as not having an associated wiphy.
@@ -202,8 +205,8 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
202 mutex_unlock(&wdev->mtx); 205 mutex_unlock(&wdev->mtx);
203} 206}
204 207
205#define ASSERT_RDEV_LOCK(rdev) WARN_ON(!mutex_is_locked(&(rdev)->mtx)); 208#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx)
206#define ASSERT_WDEV_LOCK(wdev) WARN_ON(!mutex_is_locked(&(wdev)->mtx)); 209#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
207 210
208enum cfg80211_event_type { 211enum cfg80211_event_type {
209 EVENT_CONNECT_RESULT, 212 EVENT_CONNECT_RESULT,
@@ -331,16 +334,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
331 const u8 *resp_ie, size_t resp_ie_len, 334 const u8 *resp_ie, size_t resp_ie_len,
332 u16 status, bool wextev, 335 u16 status, bool wextev,
333 struct cfg80211_bss *bss); 336 struct cfg80211_bss *bss);
334int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 337int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
335 const u8 *match_data, int match_len); 338 u16 frame_type, const u8 *match_data,
336void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); 339 int match_len);
337void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); 340void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
338int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 341void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
339 struct net_device *dev, 342int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
340 struct ieee80211_channel *chan, 343 struct net_device *dev,
341 enum nl80211_channel_type channel_type, 344 struct ieee80211_channel *chan,
342 bool channel_type_valid, 345 enum nl80211_channel_type channel_type,
343 const u8 *buf, size_t len, u64 *cookie); 346 bool channel_type_valid,
347 const u8 *buf, size_t len, u64 *cookie);
344 348
345/* SME */ 349/* SME */
346int __cfg80211_connect(struct cfg80211_registered_device *rdev, 350int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -371,7 +375,7 @@ bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev);
371/* internal helpers */ 375/* internal helpers */
372int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, 376int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
373 struct key_params *params, int key_idx, 377 struct key_params *params, int key_idx,
374 const u8 *mac_addr); 378 bool pairwise, const u8 *mac_addr);
375void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, 379void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
376 size_t ie_len, u16 reason, bool from_ap); 380 size_t ie_len, u16 reason, bool from_ap);
377void cfg80211_sme_scan_done(struct net_device *dev); 381void cfg80211_sme_scan_done(struct net_device *dev);
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index a4991a3efec0..39765bcfb472 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -34,6 +34,7 @@ static ssize_t name## _read(struct file *file, char __user *userbuf, \
34static const struct file_operations name## _ops = { \ 34static const struct file_operations name## _ops = { \
35 .read = name## _read, \ 35 .read = name## _read, \
36 .open = cfg80211_open_file_generic, \ 36 .open = cfg80211_open_file_generic, \
37 .llseek = generic_file_llseek, \
37}; 38};
38 39
39DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", 40DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d",
@@ -102,6 +103,7 @@ static ssize_t ht40allow_map_read(struct file *file,
102static const struct file_operations ht40allow_map_ops = { 103static const struct file_operations ht40allow_map_ops = {
103 .read = ht40allow_map_read, 104 .read = ht40allow_map_read,
104 .open = cfg80211_open_file_generic, 105 .open = cfg80211_open_file_generic,
106 .llseek = default_llseek,
105}; 107};
106 108
107#define DEBUGFS_ADD(name) \ 109#define DEBUGFS_ADD(name) \
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 27a8ce9343c3..f33fbb79437c 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -88,6 +88,25 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
88 if (wdev->ssid_len) 88 if (wdev->ssid_len)
89 return -EALREADY; 89 return -EALREADY;
90 90
91 if (!params->basic_rates) {
92 /*
93 * If no rates were explicitly configured,
94 * use the mandatory rate set for 11b or
95 * 11a for maximum compatibility.
96 */
97 struct ieee80211_supported_band *sband =
98 rdev->wiphy.bands[params->channel->band];
99 int j;
100 u32 flag = params->channel->band == IEEE80211_BAND_5GHZ ?
101 IEEE80211_RATE_MANDATORY_A :
102 IEEE80211_RATE_MANDATORY_B;
103
104 for (j = 0; j < sband->n_bitrates; j++) {
105 if (sband->bitrates[j].flags & flag)
106 params->basic_rates |= BIT(j);
107 }
108 }
109
91 if (WARN_ON(wdev->connect_keys)) 110 if (WARN_ON(wdev->connect_keys))
92 kfree(wdev->connect_keys); 111 kfree(wdev->connect_keys);
93 wdev->connect_keys = connkeys; 112 wdev->connect_keys = connkeys;
@@ -141,7 +160,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
141 */ 160 */
142 if (rdev->ops->del_key) 161 if (rdev->ops->del_key)
143 for (i = 0; i < 6; i++) 162 for (i = 0; i < 6; i++)
144 rdev->ops->del_key(wdev->wiphy, dev, i, NULL); 163 rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
145 164
146 if (wdev->current_bss) { 165 if (wdev->current_bss) {
147 cfg80211_unhold_bss(wdev->current_bss); 166 cfg80211_unhold_bss(wdev->current_bss);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d1a3fb99fdf2..26838d903b9a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,7 +149,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
150 const u8 *bssid = mgmt->bssid; 150 const u8 *bssid = mgmt->bssid;
151 int i; 151 int i;
152 bool found = false; 152 bool found = false, was_current = false;
153 153
154 ASSERT_WDEV_LOCK(wdev); 154 ASSERT_WDEV_LOCK(wdev);
155 155
@@ -159,6 +159,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
159 cfg80211_put_bss(&wdev->current_bss->pub); 159 cfg80211_put_bss(&wdev->current_bss->pub);
160 wdev->current_bss = NULL; 160 wdev->current_bss = NULL;
161 found = true; 161 found = true;
162 was_current = true;
162 } else for (i = 0; i < MAX_AUTH_BSSES; i++) { 163 } else for (i = 0; i < MAX_AUTH_BSSES; i++) {
163 if (wdev->auth_bsses[i] && 164 if (wdev->auth_bsses[i] &&
164 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) { 165 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -183,7 +184,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
183 184
184 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 185 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
185 186
186 if (wdev->sme_state == CFG80211_SME_CONNECTED) { 187 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) {
187 u16 reason_code; 188 u16 reason_code;
188 bool from_ap; 189 bool from_ap;
189 190
@@ -747,31 +748,53 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
747} 748}
748EXPORT_SYMBOL(cfg80211_new_sta); 749EXPORT_SYMBOL(cfg80211_new_sta);
749 750
750struct cfg80211_action_registration { 751struct cfg80211_mgmt_registration {
751 struct list_head list; 752 struct list_head list;
752 753
753 u32 nlpid; 754 u32 nlpid;
754 755
755 int match_len; 756 int match_len;
756 757
758 __le16 frame_type;
759
757 u8 match[]; 760 u8 match[];
758}; 761};
759 762
760int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 763int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
761 const u8 *match_data, int match_len) 764 u16 frame_type, const u8 *match_data,
765 int match_len)
762{ 766{
763 struct cfg80211_action_registration *reg, *nreg; 767 struct wiphy *wiphy = wdev->wiphy;
768 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
769 struct cfg80211_mgmt_registration *reg, *nreg;
764 int err = 0; 770 int err = 0;
771 u16 mgmt_type;
772
773 if (!wdev->wiphy->mgmt_stypes)
774 return -EOPNOTSUPP;
775
776 if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
777 return -EINVAL;
778
779 if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
780 return -EINVAL;
781
782 mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
783 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
784 return -EINVAL;
765 785
766 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); 786 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
767 if (!nreg) 787 if (!nreg)
768 return -ENOMEM; 788 return -ENOMEM;
769 789
770 spin_lock_bh(&wdev->action_registrations_lock); 790 spin_lock_bh(&wdev->mgmt_registrations_lock);
771 791
772 list_for_each_entry(reg, &wdev->action_registrations, list) { 792 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
773 int mlen = min(match_len, reg->match_len); 793 int mlen = min(match_len, reg->match_len);
774 794
795 if (frame_type != le16_to_cpu(reg->frame_type))
796 continue;
797
775 if (memcmp(reg->match, match_data, mlen) == 0) { 798 if (memcmp(reg->match, match_data, mlen) == 0) {
776 err = -EALREADY; 799 err = -EALREADY;
777 break; 800 break;
@@ -786,140 +809,212 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
786 memcpy(nreg->match, match_data, match_len); 809 memcpy(nreg->match, match_data, match_len);
787 nreg->match_len = match_len; 810 nreg->match_len = match_len;
788 nreg->nlpid = snd_pid; 811 nreg->nlpid = snd_pid;
789 list_add(&nreg->list, &wdev->action_registrations); 812 nreg->frame_type = cpu_to_le16(frame_type);
813 list_add(&nreg->list, &wdev->mgmt_registrations);
814
815 if (rdev->ops->mgmt_frame_register)
816 rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
817 frame_type, true);
790 818
791 out: 819 out:
792 spin_unlock_bh(&wdev->action_registrations_lock); 820 spin_unlock_bh(&wdev->mgmt_registrations_lock);
821
793 return err; 822 return err;
794} 823}
795 824
796void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) 825void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
797{ 826{
798 struct cfg80211_action_registration *reg, *tmp; 827 struct wiphy *wiphy = wdev->wiphy;
828 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
829 struct cfg80211_mgmt_registration *reg, *tmp;
799 830
800 spin_lock_bh(&wdev->action_registrations_lock); 831 spin_lock_bh(&wdev->mgmt_registrations_lock);
801 832
802 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 833 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
803 if (reg->nlpid == nlpid) { 834 if (reg->nlpid != nlpid)
804 list_del(&reg->list); 835 continue;
805 kfree(reg); 836
837 if (rdev->ops->mgmt_frame_register) {
838 u16 frame_type = le16_to_cpu(reg->frame_type);
839
840 rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
841 frame_type, false);
806 } 842 }
843
844 list_del(&reg->list);
845 kfree(reg);
807 } 846 }
808 847
809 spin_unlock_bh(&wdev->action_registrations_lock); 848 spin_unlock_bh(&wdev->mgmt_registrations_lock);
810} 849}
811 850
812void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) 851void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
813{ 852{
814 struct cfg80211_action_registration *reg, *tmp; 853 struct cfg80211_mgmt_registration *reg, *tmp;
815 854
816 spin_lock_bh(&wdev->action_registrations_lock); 855 spin_lock_bh(&wdev->mgmt_registrations_lock);
817 856
818 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 857 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
819 list_del(&reg->list); 858 list_del(&reg->list);
820 kfree(reg); 859 kfree(reg);
821 } 860 }
822 861
823 spin_unlock_bh(&wdev->action_registrations_lock); 862 spin_unlock_bh(&wdev->mgmt_registrations_lock);
824} 863}
825 864
826int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 865int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
827 struct net_device *dev, 866 struct net_device *dev,
828 struct ieee80211_channel *chan, 867 struct ieee80211_channel *chan,
829 enum nl80211_channel_type channel_type, 868 enum nl80211_channel_type channel_type,
830 bool channel_type_valid, 869 bool channel_type_valid,
831 const u8 *buf, size_t len, u64 *cookie) 870 const u8 *buf, size_t len, u64 *cookie)
832{ 871{
833 struct wireless_dev *wdev = dev->ieee80211_ptr; 872 struct wireless_dev *wdev = dev->ieee80211_ptr;
834 const struct ieee80211_mgmt *mgmt; 873 const struct ieee80211_mgmt *mgmt;
874 u16 stype;
835 875
836 if (rdev->ops->action == NULL) 876 if (!wdev->wiphy->mgmt_stypes)
837 return -EOPNOTSUPP; 877 return -EOPNOTSUPP;
878
879 if (!rdev->ops->mgmt_tx)
880 return -EOPNOTSUPP;
881
838 if (len < 24 + 1) 882 if (len < 24 + 1)
839 return -EINVAL; 883 return -EINVAL;
840 884
841 mgmt = (const struct ieee80211_mgmt *) buf; 885 mgmt = (const struct ieee80211_mgmt *) buf;
842 if (!ieee80211_is_action(mgmt->frame_control)) 886
887 if (!ieee80211_is_mgmt(mgmt->frame_control))
843 return -EINVAL; 888 return -EINVAL;
844 if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { 889
845 /* Verify that we are associated with the destination AP */ 890 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
891 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
892 return -EINVAL;
893
894 if (ieee80211_is_action(mgmt->frame_control) &&
895 mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
896 int err = 0;
897
846 wdev_lock(wdev); 898 wdev_lock(wdev);
847 899
848 if (!wdev->current_bss || 900 switch (wdev->iftype) {
849 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, 901 case NL80211_IFTYPE_ADHOC:
850 ETH_ALEN) != 0 || 902 case NL80211_IFTYPE_STATION:
851 (wdev->iftype == NL80211_IFTYPE_STATION && 903 case NL80211_IFTYPE_P2P_CLIENT:
852 memcmp(wdev->current_bss->pub.bssid, mgmt->da, 904 if (!wdev->current_bss) {
853 ETH_ALEN) != 0)) { 905 err = -ENOTCONN;
854 wdev_unlock(wdev); 906 break;
855 return -ENOTCONN; 907 }
856 } 908
909 if (memcmp(wdev->current_bss->pub.bssid,
910 mgmt->bssid, ETH_ALEN)) {
911 err = -ENOTCONN;
912 break;
913 }
914
915 /*
916 * check for IBSS DA must be done by driver as
917 * cfg80211 doesn't track the stations
918 */
919 if (wdev->iftype == NL80211_IFTYPE_ADHOC)
920 break;
857 921
922 /* for station, check that DA is the AP */
923 if (memcmp(wdev->current_bss->pub.bssid,
924 mgmt->da, ETH_ALEN)) {
925 err = -ENOTCONN;
926 break;
927 }
928 break;
929 case NL80211_IFTYPE_AP:
930 case NL80211_IFTYPE_P2P_GO:
931 case NL80211_IFTYPE_AP_VLAN:
932 if (memcmp(mgmt->bssid, dev->dev_addr, ETH_ALEN))
933 err = -EINVAL;
934 break;
935 default:
936 err = -EOPNOTSUPP;
937 break;
938 }
858 wdev_unlock(wdev); 939 wdev_unlock(wdev);
940
941 if (err)
942 return err;
859 } 943 }
860 944
861 if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0) 945 if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0)
862 return -EINVAL; 946 return -EINVAL;
863 947
864 /* Transmit the Action frame as requested by user space */ 948 /* Transmit the Action frame as requested by user space */
865 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 949 return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
866 channel_type_valid, buf, len, cookie); 950 channel_type_valid, buf, len, cookie);
867} 951}
868 952
869bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 953bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
870 size_t len, gfp_t gfp) 954 size_t len, gfp_t gfp)
871{ 955{
872 struct wireless_dev *wdev = dev->ieee80211_ptr; 956 struct wireless_dev *wdev = dev->ieee80211_ptr;
873 struct wiphy *wiphy = wdev->wiphy; 957 struct wiphy *wiphy = wdev->wiphy;
874 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 958 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
875 struct cfg80211_action_registration *reg; 959 struct cfg80211_mgmt_registration *reg;
876 const u8 *action_data; 960 const struct ieee80211_txrx_stypes *stypes =
877 int action_data_len; 961 &wiphy->mgmt_stypes[wdev->iftype];
962 struct ieee80211_mgmt *mgmt = (void *)buf;
963 const u8 *data;
964 int data_len;
878 bool result = false; 965 bool result = false;
966 __le16 ftype = mgmt->frame_control &
967 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
968 u16 stype;
879 969
880 /* frame length - min size excluding category */ 970 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
881 action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
882 971
883 /* action data starts with category */ 972 if (!(stypes->rx & BIT(stype)))
884 action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; 973 return false;
885 974
886 spin_lock_bh(&wdev->action_registrations_lock); 975 data = buf + ieee80211_hdrlen(mgmt->frame_control);
976 data_len = len - ieee80211_hdrlen(mgmt->frame_control);
977
978 spin_lock_bh(&wdev->mgmt_registrations_lock);
979
980 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
981 if (reg->frame_type != ftype)
982 continue;
887 983
888 list_for_each_entry(reg, &wdev->action_registrations, list) { 984 if (reg->match_len > data_len)
889 if (reg->match_len > action_data_len)
890 continue; 985 continue;
891 986
892 if (memcmp(reg->match, action_data, reg->match_len)) 987 if (memcmp(reg->match, data, reg->match_len))
893 continue; 988 continue;
894 989
895 /* found match! */ 990 /* found match! */
896 991
897 /* Indicate the received Action frame to user space */ 992 /* Indicate the received Action frame to user space */
898 if (nl80211_send_action(rdev, dev, reg->nlpid, freq, 993 if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
899 buf, len, gfp)) 994 buf, len, gfp))
900 continue; 995 continue;
901 996
902 result = true; 997 result = true;
903 break; 998 break;
904 } 999 }
905 1000
906 spin_unlock_bh(&wdev->action_registrations_lock); 1001 spin_unlock_bh(&wdev->mgmt_registrations_lock);
907 1002
908 return result; 1003 return result;
909} 1004}
910EXPORT_SYMBOL(cfg80211_rx_action); 1005EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 1006
912void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, 1007void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp) 1008 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{ 1009{
915 struct wireless_dev *wdev = dev->ieee80211_ptr; 1010 struct wireless_dev *wdev = dev->ieee80211_ptr;
916 struct wiphy *wiphy = wdev->wiphy; 1011 struct wiphy *wiphy = wdev->wiphy;
917 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 1012 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
918 1013
919 /* Indicate TX status of the Action frame to user space */ 1014 /* Indicate TX status of the Action frame to user space */
920 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 1015 nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
921} 1016}
922EXPORT_SYMBOL(cfg80211_action_tx_status); 1017EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
923 1018
924void cfg80211_cqm_rssi_notify(struct net_device *dev, 1019void cfg80211_cqm_rssi_notify(struct net_device *dev,
925 enum nl80211_cqm_rssi_threshold_event rssi_event, 1020 enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 37902a54e9c1..4e78e3f26798 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -23,6 +23,11 @@
23#include "nl80211.h" 23#include "nl80211.h"
24#include "reg.h" 24#include "reg.h"
25 25
26static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
27 struct genl_info *info);
28static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
29 struct genl_info *info);
30
26/* the netlink family */ 31/* the netlink family */
27static struct genl_family nl80211_fam = { 32static struct genl_family nl80211_fam = {
28 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ 33 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
@@ -31,6 +36,8 @@ static struct genl_family nl80211_fam = {
31 .version = 1, /* no particular meaning now */ 36 .version = 1, /* no particular meaning now */
32 .maxattr = NL80211_ATTR_MAX, 37 .maxattr = NL80211_ATTR_MAX,
33 .netnsok = true, 38 .netnsok = true,
39 .pre_doit = nl80211_pre_doit,
40 .post_doit = nl80211_post_doit,
34}; 41};
35 42
36/* internal helper: get rdev and dev */ 43/* internal helper: get rdev and dev */
@@ -86,6 +93,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
86 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, 93 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
87 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, 94 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
88 [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, 95 [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
96 [NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 },
89 97
90 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, 98 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
91 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, 99 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
@@ -136,6 +144,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
136 .len = sizeof(struct nl80211_sta_flag_update), 144 .len = sizeof(struct nl80211_sta_flag_update),
137 }, 145 },
138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, 146 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
147 [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
148 [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, 149 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
140 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, 150 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
141 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, 151 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -156,9 +166,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
156 166
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 167 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 168 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
169 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
159}; 170};
160 171
161/* policy for the attributes */ 172/* policy for the key attributes */
162static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { 173static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
163 [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, 174 [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN },
164 [NL80211_KEY_IDX] = { .type = NLA_U8 }, 175 [NL80211_KEY_IDX] = { .type = NLA_U8 },
@@ -166,6 +177,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
166 [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, 177 [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
167 [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, 178 [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
168 [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, 179 [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
180 [NL80211_KEY_TYPE] = { .type = NLA_U32 },
169}; 181};
170 182
171/* ifidx get helper */ 183/* ifidx get helper */
@@ -188,6 +200,47 @@ static int nl80211_get_ifidx(struct netlink_callback *cb)
188 return res; 200 return res;
189} 201}
190 202
203static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
204 struct netlink_callback *cb,
205 struct cfg80211_registered_device **rdev,
206 struct net_device **dev)
207{
208 int ifidx = cb->args[0];
209 int err;
210
211 if (!ifidx)
212 ifidx = nl80211_get_ifidx(cb);
213 if (ifidx < 0)
214 return ifidx;
215
216 cb->args[0] = ifidx;
217
218 rtnl_lock();
219
220 *dev = __dev_get_by_index(sock_net(skb->sk), ifidx);
221 if (!*dev) {
222 err = -ENODEV;
223 goto out_rtnl;
224 }
225
226 *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
227 if (IS_ERR(*rdev)) {
228 err = PTR_ERR(*rdev);
229 goto out_rtnl;
230 }
231
232 return 0;
233 out_rtnl:
234 rtnl_unlock();
235 return err;
236}
237
238static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev)
239{
240 cfg80211_unlock_rdev(rdev);
241 rtnl_unlock();
242}
243
191/* IE validation */ 244/* IE validation */
192static bool is_valid_ie_attr(const struct nlattr *attr) 245static bool is_valid_ie_attr(const struct nlattr *attr)
193{ 246{
@@ -255,6 +308,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
255struct key_parse { 308struct key_parse {
256 struct key_params p; 309 struct key_params p;
257 int idx; 310 int idx;
311 int type;
258 bool def, defmgmt; 312 bool def, defmgmt;
259}; 313};
260 314
@@ -285,6 +339,12 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
285 if (tb[NL80211_KEY_CIPHER]) 339 if (tb[NL80211_KEY_CIPHER])
286 k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); 340 k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
287 341
342 if (tb[NL80211_KEY_TYPE]) {
343 k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
344 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
345 return -EINVAL;
346 }
347
288 return 0; 348 return 0;
289} 349}
290 350
@@ -309,6 +369,12 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
309 k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; 369 k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
310 k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; 370 k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
311 371
372 if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
373 k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
374 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
375 return -EINVAL;
376 }
377
312 return 0; 378 return 0;
313} 379}
314 380
@@ -318,6 +384,7 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
318 384
319 memset(k, 0, sizeof(*k)); 385 memset(k, 0, sizeof(*k));
320 k->idx = -1; 386 k->idx = -1;
387 k->type = -1;
321 388
322 if (info->attrs[NL80211_ATTR_KEY]) 389 if (info->attrs[NL80211_ATTR_KEY])
323 err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); 390 err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k);
@@ -382,7 +449,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
382 } else if (parse.defmgmt) 449 } else if (parse.defmgmt)
383 goto error; 450 goto error;
384 err = cfg80211_validate_key_settings(rdev, &parse.p, 451 err = cfg80211_validate_key_settings(rdev, &parse.p,
385 parse.idx, NULL); 452 parse.idx, false, NULL);
386 if (err) 453 if (err)
387 goto error; 454 goto error;
388 result->params[parse.idx].cipher = parse.p.cipher; 455 result->params[parse.idx].cipher = parse.p.cipher;
@@ -401,18 +468,17 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
401{ 468{
402 ASSERT_WDEV_LOCK(wdev); 469 ASSERT_WDEV_LOCK(wdev);
403 470
404 if (!netif_running(wdev->netdev))
405 return -ENETDOWN;
406
407 switch (wdev->iftype) { 471 switch (wdev->iftype) {
408 case NL80211_IFTYPE_AP: 472 case NL80211_IFTYPE_AP:
409 case NL80211_IFTYPE_AP_VLAN: 473 case NL80211_IFTYPE_AP_VLAN:
474 case NL80211_IFTYPE_P2P_GO:
410 break; 475 break;
411 case NL80211_IFTYPE_ADHOC: 476 case NL80211_IFTYPE_ADHOC:
412 if (!wdev->current_bss) 477 if (!wdev->current_bss)
413 return -ENOLINK; 478 return -ENOLINK;
414 break; 479 break;
415 case NL80211_IFTYPE_STATION: 480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_P2P_CLIENT:
416 if (wdev->sme_state != CFG80211_SME_CONNECTED) 482 if (wdev->sme_state != CFG80211_SME_CONNECTED)
417 return -ENOLINK; 483 return -ENOLINK;
418 break; 484 break;
@@ -437,6 +503,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
437 struct ieee80211_rate *rate; 503 struct ieee80211_rate *rate;
438 int i; 504 int i;
439 u16 ifmodes = dev->wiphy.interface_modes; 505 u16 ifmodes = dev->wiphy.interface_modes;
506 const struct ieee80211_txrx_stypes *mgmt_stypes =
507 dev->wiphy.mgmt_stypes;
440 508
441 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); 509 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
442 if (!hdr) 510 if (!hdr)
@@ -464,6 +532,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
464 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, 532 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
465 dev->wiphy.max_scan_ie_len); 533 dev->wiphy.max_scan_ie_len);
466 534
535 if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
536 NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
537
467 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, 538 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
468 sizeof(u32) * dev->wiphy.n_cipher_suites, 539 sizeof(u32) * dev->wiphy.n_cipher_suites,
469 dev->wiphy.cipher_suites); 540 dev->wiphy.cipher_suites);
@@ -471,6 +542,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
471 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 542 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
472 dev->wiphy.max_num_pmkids); 543 dev->wiphy.max_num_pmkids);
473 544
545 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
546 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
547
474 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 548 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
475 if (!nl_modes) 549 if (!nl_modes)
476 goto nla_put_failure; 550 goto nla_put_failure;
@@ -587,12 +661,13 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
587 CMD(flush_pmksa, FLUSH_PMKSA); 661 CMD(flush_pmksa, FLUSH_PMKSA);
588 CMD(remain_on_channel, REMAIN_ON_CHANNEL); 662 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
589 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); 663 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
590 CMD(action, ACTION); 664 CMD(mgmt_tx, FRAME);
591 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { 665 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
592 i++; 666 i++;
593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 667 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
594 } 668 }
595 CMD(set_channel, SET_CHANNEL); 669 CMD(set_channel, SET_CHANNEL);
670 CMD(set_wds_peer, SET_WDS_PEER);
596 671
597#undef CMD 672#undef CMD
598 673
@@ -608,6 +683,55 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
608 683
609 nla_nest_end(msg, nl_cmds); 684 nla_nest_end(msg, nl_cmds);
610 685
686 if (mgmt_stypes) {
687 u16 stypes;
688 struct nlattr *nl_ftypes, *nl_ifs;
689 enum nl80211_iftype ift;
690
691 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
692 if (!nl_ifs)
693 goto nla_put_failure;
694
695 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
696 nl_ftypes = nla_nest_start(msg, ift);
697 if (!nl_ftypes)
698 goto nla_put_failure;
699 i = 0;
700 stypes = mgmt_stypes[ift].tx;
701 while (stypes) {
702 if (stypes & 1)
703 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
704 (i << 4) | IEEE80211_FTYPE_MGMT);
705 stypes >>= 1;
706 i++;
707 }
708 nla_nest_end(msg, nl_ftypes);
709 }
710
711 nla_nest_end(msg, nl_ifs);
712
713 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
714 if (!nl_ifs)
715 goto nla_put_failure;
716
717 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
718 nl_ftypes = nla_nest_start(msg, ift);
719 if (!nl_ftypes)
720 goto nla_put_failure;
721 i = 0;
722 stypes = mgmt_stypes[ift].rx;
723 while (stypes) {
724 if (stypes & 1)
725 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
726 (i << 4) | IEEE80211_FTYPE_MGMT);
727 stypes >>= 1;
728 i++;
729 }
730 nla_nest_end(msg, nl_ftypes);
731 }
732 nla_nest_end(msg, nl_ifs);
733 }
734
611 return genlmsg_end(msg, hdr); 735 return genlmsg_end(msg, hdr);
612 736
613 nla_put_failure: 737 nla_put_failure:
@@ -644,28 +768,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
644static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) 768static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
645{ 769{
646 struct sk_buff *msg; 770 struct sk_buff *msg;
647 struct cfg80211_registered_device *dev; 771 struct cfg80211_registered_device *dev = info->user_ptr[0];
648
649 dev = cfg80211_get_dev_from_info(info);
650 if (IS_ERR(dev))
651 return PTR_ERR(dev);
652 772
653 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 773 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
654 if (!msg) 774 if (!msg)
655 goto out_err; 775 return -ENOMEM;
656
657 if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0)
658 goto out_free;
659 776
660 cfg80211_unlock_rdev(dev); 777 if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) {
778 nlmsg_free(msg);
779 return -ENOBUFS;
780 }
661 781
662 return genlmsg_reply(msg, info); 782 return genlmsg_reply(msg, info);
663
664 out_free:
665 nlmsg_free(msg);
666 out_err:
667 cfg80211_unlock_rdev(dev);
668 return -ENOBUFS;
669} 783}
670 784
671static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { 785static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
@@ -709,7 +823,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
709 wdev->iftype == NL80211_IFTYPE_AP || 823 wdev->iftype == NL80211_IFTYPE_AP ||
710 wdev->iftype == NL80211_IFTYPE_WDS || 824 wdev->iftype == NL80211_IFTYPE_WDS ||
711 wdev->iftype == NL80211_IFTYPE_MESH_POINT || 825 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
712 wdev->iftype == NL80211_IFTYPE_MONITOR; 826 wdev->iftype == NL80211_IFTYPE_MONITOR ||
827 wdev->iftype == NL80211_IFTYPE_P2P_GO;
713} 828}
714 829
715static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, 830static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
@@ -753,38 +868,48 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
753 868
754static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) 869static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
755{ 870{
756 struct cfg80211_registered_device *rdev; 871 struct cfg80211_registered_device *rdev = info->user_ptr[0];
757 struct net_device *netdev; 872 struct net_device *netdev = info->user_ptr[1];
758 int result;
759 873
760 rtnl_lock(); 874 return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
875}
761 876
762 result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev); 877static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
763 if (result) 878{
764 goto unlock; 879 struct cfg80211_registered_device *rdev = info->user_ptr[0];
880 struct net_device *dev = info->user_ptr[1];
881 struct wireless_dev *wdev = dev->ieee80211_ptr;
882 const u8 *bssid;
765 883
766 result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); 884 if (!info->attrs[NL80211_ATTR_MAC])
885 return -EINVAL;
767 886
768 unlock: 887 if (netif_running(dev))
769 rtnl_unlock(); 888 return -EBUSY;
770 889
771 return result; 890 if (!rdev->ops->set_wds_peer)
891 return -EOPNOTSUPP;
892
893 if (wdev->iftype != NL80211_IFTYPE_WDS)
894 return -EOPNOTSUPP;
895
896 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
897 return rdev->ops->set_wds_peer(wdev->wiphy, dev, bssid);
772} 898}
773 899
900
774static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) 901static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
775{ 902{
776 struct cfg80211_registered_device *rdev; 903 struct cfg80211_registered_device *rdev;
777 struct net_device *netdev = NULL; 904 struct net_device *netdev = NULL;
778 struct wireless_dev *wdev; 905 struct wireless_dev *wdev;
779 int result, rem_txq_params = 0; 906 int result = 0, rem_txq_params = 0;
780 struct nlattr *nl_txq_params; 907 struct nlattr *nl_txq_params;
781 u32 changed; 908 u32 changed;
782 u8 retry_short = 0, retry_long = 0; 909 u8 retry_short = 0, retry_long = 0;
783 u32 frag_threshold = 0, rts_threshold = 0; 910 u32 frag_threshold = 0, rts_threshold = 0;
784 u8 coverage_class = 0; 911 u8 coverage_class = 0;
785 912
786 rtnl_lock();
787
788 /* 913 /*
789 * Try to find the wiphy and netdev. Normally this 914 * Try to find the wiphy and netdev. Normally this
790 * function shouldn't need the netdev, but this is 915 * function shouldn't need the netdev, but this is
@@ -811,8 +936,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
811 rdev = __cfg80211_rdev_from_info(info); 936 rdev = __cfg80211_rdev_from_info(info);
812 if (IS_ERR(rdev)) { 937 if (IS_ERR(rdev)) {
813 mutex_unlock(&cfg80211_mutex); 938 mutex_unlock(&cfg80211_mutex);
814 result = PTR_ERR(rdev); 939 return PTR_ERR(rdev);
815 goto unlock;
816 } 940 }
817 wdev = NULL; 941 wdev = NULL;
818 netdev = NULL; 942 netdev = NULL;
@@ -994,8 +1118,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
994 mutex_unlock(&rdev->mtx); 1118 mutex_unlock(&rdev->mtx);
995 if (netdev) 1119 if (netdev)
996 dev_put(netdev); 1120 dev_put(netdev);
997 unlock:
998 rtnl_unlock();
999 return result; 1121 return result;
1000} 1122}
1001 1123
@@ -1075,33 +1197,20 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
1075static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) 1197static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
1076{ 1198{
1077 struct sk_buff *msg; 1199 struct sk_buff *msg;
1078 struct cfg80211_registered_device *dev; 1200 struct cfg80211_registered_device *dev = info->user_ptr[0];
1079 struct net_device *netdev; 1201 struct net_device *netdev = info->user_ptr[1];
1080 int err;
1081
1082 err = get_rdev_dev_by_info_ifindex(info, &dev, &netdev);
1083 if (err)
1084 return err;
1085 1202
1086 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1203 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1087 if (!msg) 1204 if (!msg)
1088 goto out_err; 1205 return -ENOMEM;
1089 1206
1090 if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, 1207 if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
1091 dev, netdev) < 0) 1208 dev, netdev) < 0) {
1092 goto out_free; 1209 nlmsg_free(msg);
1093 1210 return -ENOBUFS;
1094 dev_put(netdev); 1211 }
1095 cfg80211_unlock_rdev(dev);
1096 1212
1097 return genlmsg_reply(msg, info); 1213 return genlmsg_reply(msg, info);
1098
1099 out_free:
1100 nlmsg_free(msg);
1101 out_err:
1102 dev_put(netdev);
1103 cfg80211_unlock_rdev(dev);
1104 return -ENOBUFS;
1105} 1214}
1106 1215
1107static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { 1216static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = {
@@ -1161,39 +1270,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
1161 1270
1162static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) 1271static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1163{ 1272{
1164 struct cfg80211_registered_device *rdev; 1273 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1165 struct vif_params params; 1274 struct vif_params params;
1166 int err; 1275 int err;
1167 enum nl80211_iftype otype, ntype; 1276 enum nl80211_iftype otype, ntype;
1168 struct net_device *dev; 1277 struct net_device *dev = info->user_ptr[1];
1169 u32 _flags, *flags = NULL; 1278 u32 _flags, *flags = NULL;
1170 bool change = false; 1279 bool change = false;
1171 1280
1172 memset(&params, 0, sizeof(params)); 1281 memset(&params, 0, sizeof(params));
1173 1282
1174 rtnl_lock();
1175
1176 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1177 if (err)
1178 goto unlock_rtnl;
1179
1180 otype = ntype = dev->ieee80211_ptr->iftype; 1283 otype = ntype = dev->ieee80211_ptr->iftype;
1181 1284
1182 if (info->attrs[NL80211_ATTR_IFTYPE]) { 1285 if (info->attrs[NL80211_ATTR_IFTYPE]) {
1183 ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); 1286 ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
1184 if (otype != ntype) 1287 if (otype != ntype)
1185 change = true; 1288 change = true;
1186 if (ntype > NL80211_IFTYPE_MAX) { 1289 if (ntype > NL80211_IFTYPE_MAX)
1187 err = -EINVAL; 1290 return -EINVAL;
1188 goto unlock;
1189 }
1190 } 1291 }
1191 1292
1192 if (info->attrs[NL80211_ATTR_MESH_ID]) { 1293 if (info->attrs[NL80211_ATTR_MESH_ID]) {
1193 if (ntype != NL80211_IFTYPE_MESH_POINT) { 1294 if (ntype != NL80211_IFTYPE_MESH_POINT)
1194 err = -EINVAL; 1295 return -EINVAL;
1195 goto unlock;
1196 }
1197 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); 1296 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
1198 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); 1297 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
1199 change = true; 1298 change = true;
@@ -1204,20 +1303,18 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1204 change = true; 1303 change = true;
1205 err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); 1304 err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype);
1206 if (err) 1305 if (err)
1207 goto unlock; 1306 return err;
1208 } else { 1307 } else {
1209 params.use_4addr = -1; 1308 params.use_4addr = -1;
1210 } 1309 }
1211 1310
1212 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { 1311 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
1213 if (ntype != NL80211_IFTYPE_MONITOR) { 1312 if (ntype != NL80211_IFTYPE_MONITOR)
1214 err = -EINVAL; 1313 return -EINVAL;
1215 goto unlock;
1216 }
1217 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], 1314 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
1218 &_flags); 1315 &_flags);
1219 if (err) 1316 if (err)
1220 goto unlock; 1317 return err;
1221 1318
1222 flags = &_flags; 1319 flags = &_flags;
1223 change = true; 1320 change = true;
@@ -1231,17 +1328,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1231 if (!err && params.use_4addr != -1) 1328 if (!err && params.use_4addr != -1)
1232 dev->ieee80211_ptr->use_4addr = params.use_4addr; 1329 dev->ieee80211_ptr->use_4addr = params.use_4addr;
1233 1330
1234 unlock:
1235 dev_put(dev);
1236 cfg80211_unlock_rdev(rdev);
1237 unlock_rtnl:
1238 rtnl_unlock();
1239 return err; 1331 return err;
1240} 1332}
1241 1333
1242static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) 1334static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1243{ 1335{
1244 struct cfg80211_registered_device *rdev; 1336 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1245 struct vif_params params; 1337 struct vif_params params;
1246 int err; 1338 int err;
1247 enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; 1339 enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
@@ -1258,19 +1350,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1258 return -EINVAL; 1350 return -EINVAL;
1259 } 1351 }
1260 1352
1261 rtnl_lock();
1262
1263 rdev = cfg80211_get_dev_from_info(info);
1264 if (IS_ERR(rdev)) {
1265 err = PTR_ERR(rdev);
1266 goto unlock_rtnl;
1267 }
1268
1269 if (!rdev->ops->add_virtual_intf || 1353 if (!rdev->ops->add_virtual_intf ||
1270 !(rdev->wiphy.interface_modes & (1 << type))) { 1354 !(rdev->wiphy.interface_modes & (1 << type)))
1271 err = -EOPNOTSUPP; 1355 return -EOPNOTSUPP;
1272 goto unlock;
1273 }
1274 1356
1275 if (type == NL80211_IFTYPE_MESH_POINT && 1357 if (type == NL80211_IFTYPE_MESH_POINT &&
1276 info->attrs[NL80211_ATTR_MESH_ID]) { 1358 info->attrs[NL80211_ATTR_MESH_ID]) {
@@ -1282,7 +1364,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1282 params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); 1364 params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
1283 err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); 1365 err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
1284 if (err) 1366 if (err)
1285 goto unlock; 1367 return err;
1286 } 1368 }
1287 1369
1288 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? 1370 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
@@ -1292,38 +1374,18 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1292 nla_data(info->attrs[NL80211_ATTR_IFNAME]), 1374 nla_data(info->attrs[NL80211_ATTR_IFNAME]),
1293 type, err ? NULL : &flags, &params); 1375 type, err ? NULL : &flags, &params);
1294 1376
1295 unlock:
1296 cfg80211_unlock_rdev(rdev);
1297 unlock_rtnl:
1298 rtnl_unlock();
1299 return err; 1377 return err;
1300} 1378}
1301 1379
1302static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) 1380static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
1303{ 1381{
1304 struct cfg80211_registered_device *rdev; 1382 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1305 int err; 1383 struct net_device *dev = info->user_ptr[1];
1306 struct net_device *dev;
1307 1384
1308 rtnl_lock(); 1385 if (!rdev->ops->del_virtual_intf)
1309 1386 return -EOPNOTSUPP;
1310 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1311 if (err)
1312 goto unlock_rtnl;
1313
1314 if (!rdev->ops->del_virtual_intf) {
1315 err = -EOPNOTSUPP;
1316 goto out;
1317 }
1318
1319 err = rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
1320 1387
1321 out: 1388 return rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
1322 cfg80211_unlock_rdev(rdev);
1323 dev_put(dev);
1324 unlock_rtnl:
1325 rtnl_unlock();
1326 return err;
1327} 1389}
1328 1390
1329struct get_key_cookie { 1391struct get_key_cookie {
@@ -1376,11 +1438,12 @@ static void get_key_callback(void *c, struct key_params *params)
1376 1438
1377static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) 1439static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1378{ 1440{
1379 struct cfg80211_registered_device *rdev; 1441 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1380 int err; 1442 int err;
1381 struct net_device *dev; 1443 struct net_device *dev = info->user_ptr[1];
1382 u8 key_idx = 0; 1444 u8 key_idx = 0;
1383 u8 *mac_addr = NULL; 1445 const u8 *mac_addr = NULL;
1446 bool pairwise;
1384 struct get_key_cookie cookie = { 1447 struct get_key_cookie cookie = {
1385 .error = 0, 1448 .error = 0,
1386 }; 1449 };
@@ -1396,30 +1459,28 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1396 if (info->attrs[NL80211_ATTR_MAC]) 1459 if (info->attrs[NL80211_ATTR_MAC])
1397 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1460 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1398 1461
1399 rtnl_lock(); 1462 pairwise = !!mac_addr;
1400 1463 if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
1401 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1464 u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
1402 if (err) 1465 if (kt >= NUM_NL80211_KEYTYPES)
1403 goto unlock_rtnl; 1466 return -EINVAL;
1404 1467 if (kt != NL80211_KEYTYPE_GROUP &&
1405 if (!rdev->ops->get_key) { 1468 kt != NL80211_KEYTYPE_PAIRWISE)
1406 err = -EOPNOTSUPP; 1469 return -EINVAL;
1407 goto out; 1470 pairwise = kt == NL80211_KEYTYPE_PAIRWISE;
1408 } 1471 }
1409 1472
1473 if (!rdev->ops->get_key)
1474 return -EOPNOTSUPP;
1475
1410 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1476 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1411 if (!msg) { 1477 if (!msg)
1412 err = -ENOMEM; 1478 return -ENOMEM;
1413 goto out;
1414 }
1415 1479
1416 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 1480 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
1417 NL80211_CMD_NEW_KEY); 1481 NL80211_CMD_NEW_KEY);
1418 1482 if (IS_ERR(hdr))
1419 if (IS_ERR(hdr)) { 1483 return PTR_ERR(hdr);
1420 err = PTR_ERR(hdr);
1421 goto free_msg;
1422 }
1423 1484
1424 cookie.msg = msg; 1485 cookie.msg = msg;
1425 cookie.idx = key_idx; 1486 cookie.idx = key_idx;
@@ -1429,8 +1490,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1429 if (mac_addr) 1490 if (mac_addr)
1430 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); 1491 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
1431 1492
1432 err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, mac_addr, 1493 if (pairwise && mac_addr &&
1433 &cookie, get_key_callback); 1494 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
1495 return -ENOENT;
1496
1497 err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, pairwise,
1498 mac_addr, &cookie, get_key_callback);
1434 1499
1435 if (err) 1500 if (err)
1436 goto free_msg; 1501 goto free_msg;
@@ -1439,28 +1504,21 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1439 goto nla_put_failure; 1504 goto nla_put_failure;
1440 1505
1441 genlmsg_end(msg, hdr); 1506 genlmsg_end(msg, hdr);
1442 err = genlmsg_reply(msg, info); 1507 return genlmsg_reply(msg, info);
1443 goto out;
1444 1508
1445 nla_put_failure: 1509 nla_put_failure:
1446 err = -ENOBUFS; 1510 err = -ENOBUFS;
1447 free_msg: 1511 free_msg:
1448 nlmsg_free(msg); 1512 nlmsg_free(msg);
1449 out:
1450 cfg80211_unlock_rdev(rdev);
1451 dev_put(dev);
1452 unlock_rtnl:
1453 rtnl_unlock();
1454
1455 return err; 1513 return err;
1456} 1514}
1457 1515
1458static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) 1516static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1459{ 1517{
1460 struct cfg80211_registered_device *rdev; 1518 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1461 struct key_parse key; 1519 struct key_parse key;
1462 int err; 1520 int err;
1463 struct net_device *dev; 1521 struct net_device *dev = info->user_ptr[1];
1464 int (*func)(struct wiphy *wiphy, struct net_device *netdev, 1522 int (*func)(struct wiphy *wiphy, struct net_device *netdev,
1465 u8 key_index); 1523 u8 key_index);
1466 1524
@@ -1475,21 +1533,13 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1475 if (!key.def && !key.defmgmt) 1533 if (!key.def && !key.defmgmt)
1476 return -EINVAL; 1534 return -EINVAL;
1477 1535
1478 rtnl_lock();
1479
1480 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1481 if (err)
1482 goto unlock_rtnl;
1483
1484 if (key.def) 1536 if (key.def)
1485 func = rdev->ops->set_default_key; 1537 func = rdev->ops->set_default_key;
1486 else 1538 else
1487 func = rdev->ops->set_default_mgmt_key; 1539 func = rdev->ops->set_default_mgmt_key;
1488 1540
1489 if (!func) { 1541 if (!func)
1490 err = -EOPNOTSUPP; 1542 return -EOPNOTSUPP;
1491 goto out;
1492 }
1493 1543
1494 wdev_lock(dev->ieee80211_ptr); 1544 wdev_lock(dev->ieee80211_ptr);
1495 err = nl80211_key_allowed(dev->ieee80211_ptr); 1545 err = nl80211_key_allowed(dev->ieee80211_ptr);
@@ -1506,23 +1556,16 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1506#endif 1556#endif
1507 wdev_unlock(dev->ieee80211_ptr); 1557 wdev_unlock(dev->ieee80211_ptr);
1508 1558
1509 out:
1510 cfg80211_unlock_rdev(rdev);
1511 dev_put(dev);
1512
1513 unlock_rtnl:
1514 rtnl_unlock();
1515
1516 return err; 1559 return err;
1517} 1560}
1518 1561
1519static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) 1562static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
1520{ 1563{
1521 struct cfg80211_registered_device *rdev; 1564 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1522 int err; 1565 int err;
1523 struct net_device *dev; 1566 struct net_device *dev = info->user_ptr[1];
1524 struct key_parse key; 1567 struct key_parse key;
1525 u8 *mac_addr = NULL; 1568 const u8 *mac_addr = NULL;
1526 1569
1527 err = nl80211_parse_key(info, &key); 1570 err = nl80211_parse_key(info, &key);
1528 if (err) 1571 if (err)
@@ -1534,43 +1577,42 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
1534 if (info->attrs[NL80211_ATTR_MAC]) 1577 if (info->attrs[NL80211_ATTR_MAC])
1535 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1578 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1536 1579
1537 rtnl_lock(); 1580 if (key.type == -1) {
1581 if (mac_addr)
1582 key.type = NL80211_KEYTYPE_PAIRWISE;
1583 else
1584 key.type = NL80211_KEYTYPE_GROUP;
1585 }
1538 1586
1539 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1587 /* for now */
1540 if (err) 1588 if (key.type != NL80211_KEYTYPE_PAIRWISE &&
1541 goto unlock_rtnl; 1589 key.type != NL80211_KEYTYPE_GROUP)
1590 return -EINVAL;
1542 1591
1543 if (!rdev->ops->add_key) { 1592 if (!rdev->ops->add_key)
1544 err = -EOPNOTSUPP; 1593 return -EOPNOTSUPP;
1545 goto out;
1546 }
1547 1594
1548 if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr)) { 1595 if (cfg80211_validate_key_settings(rdev, &key.p, key.idx,
1549 err = -EINVAL; 1596 key.type == NL80211_KEYTYPE_PAIRWISE,
1550 goto out; 1597 mac_addr))
1551 } 1598 return -EINVAL;
1552 1599
1553 wdev_lock(dev->ieee80211_ptr); 1600 wdev_lock(dev->ieee80211_ptr);
1554 err = nl80211_key_allowed(dev->ieee80211_ptr); 1601 err = nl80211_key_allowed(dev->ieee80211_ptr);
1555 if (!err) 1602 if (!err)
1556 err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, 1603 err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx,
1604 key.type == NL80211_KEYTYPE_PAIRWISE,
1557 mac_addr, &key.p); 1605 mac_addr, &key.p);
1558 wdev_unlock(dev->ieee80211_ptr); 1606 wdev_unlock(dev->ieee80211_ptr);
1559 1607
1560 out:
1561 cfg80211_unlock_rdev(rdev);
1562 dev_put(dev);
1563 unlock_rtnl:
1564 rtnl_unlock();
1565
1566 return err; 1608 return err;
1567} 1609}
1568 1610
1569static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) 1611static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1570{ 1612{
1571 struct cfg80211_registered_device *rdev; 1613 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1572 int err; 1614 int err;
1573 struct net_device *dev; 1615 struct net_device *dev = info->user_ptr[1];
1574 u8 *mac_addr = NULL; 1616 u8 *mac_addr = NULL;
1575 struct key_parse key; 1617 struct key_parse key;
1576 1618
@@ -1581,21 +1623,32 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1581 if (info->attrs[NL80211_ATTR_MAC]) 1623 if (info->attrs[NL80211_ATTR_MAC])
1582 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1624 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1583 1625
1584 rtnl_lock(); 1626 if (key.type == -1) {
1627 if (mac_addr)
1628 key.type = NL80211_KEYTYPE_PAIRWISE;
1629 else
1630 key.type = NL80211_KEYTYPE_GROUP;
1631 }
1585 1632
1586 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1633 /* for now */
1587 if (err) 1634 if (key.type != NL80211_KEYTYPE_PAIRWISE &&
1588 goto unlock_rtnl; 1635 key.type != NL80211_KEYTYPE_GROUP)
1636 return -EINVAL;
1589 1637
1590 if (!rdev->ops->del_key) { 1638 if (!rdev->ops->del_key)
1591 err = -EOPNOTSUPP; 1639 return -EOPNOTSUPP;
1592 goto out;
1593 }
1594 1640
1595 wdev_lock(dev->ieee80211_ptr); 1641 wdev_lock(dev->ieee80211_ptr);
1596 err = nl80211_key_allowed(dev->ieee80211_ptr); 1642 err = nl80211_key_allowed(dev->ieee80211_ptr);
1643
1644 if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr &&
1645 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
1646 err = -ENOENT;
1647
1597 if (!err) 1648 if (!err)
1598 err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); 1649 err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx,
1650 key.type == NL80211_KEYTYPE_PAIRWISE,
1651 mac_addr);
1599 1652
1600#ifdef CONFIG_CFG80211_WEXT 1653#ifdef CONFIG_CFG80211_WEXT
1601 if (!err) { 1654 if (!err) {
@@ -1607,13 +1660,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1607#endif 1660#endif
1608 wdev_unlock(dev->ieee80211_ptr); 1661 wdev_unlock(dev->ieee80211_ptr);
1609 1662
1610 out:
1611 cfg80211_unlock_rdev(rdev);
1612 dev_put(dev);
1613
1614 unlock_rtnl:
1615 rtnl_unlock();
1616
1617 return err; 1663 return err;
1618} 1664}
1619 1665
@@ -1621,35 +1667,25 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1621{ 1667{
1622 int (*call)(struct wiphy *wiphy, struct net_device *dev, 1668 int (*call)(struct wiphy *wiphy, struct net_device *dev,
1623 struct beacon_parameters *info); 1669 struct beacon_parameters *info);
1624 struct cfg80211_registered_device *rdev; 1670 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1625 int err; 1671 struct net_device *dev = info->user_ptr[1];
1626 struct net_device *dev;
1627 struct beacon_parameters params; 1672 struct beacon_parameters params;
1628 int haveinfo = 0; 1673 int haveinfo = 0;
1629 1674
1630 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL])) 1675 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]))
1631 return -EINVAL; 1676 return -EINVAL;
1632 1677
1633 rtnl_lock(); 1678 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1634 1679 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1635 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1680 return -EOPNOTSUPP;
1636 if (err)
1637 goto unlock_rtnl;
1638
1639 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1640 err = -EOPNOTSUPP;
1641 goto out;
1642 }
1643 1681
1644 switch (info->genlhdr->cmd) { 1682 switch (info->genlhdr->cmd) {
1645 case NL80211_CMD_NEW_BEACON: 1683 case NL80211_CMD_NEW_BEACON:
1646 /* these are required for NEW_BEACON */ 1684 /* these are required for NEW_BEACON */
1647 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || 1685 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
1648 !info->attrs[NL80211_ATTR_DTIM_PERIOD] || 1686 !info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
1649 !info->attrs[NL80211_ATTR_BEACON_HEAD]) { 1687 !info->attrs[NL80211_ATTR_BEACON_HEAD])
1650 err = -EINVAL; 1688 return -EINVAL;
1651 goto out;
1652 }
1653 1689
1654 call = rdev->ops->add_beacon; 1690 call = rdev->ops->add_beacon;
1655 break; 1691 break;
@@ -1658,14 +1694,11 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1658 break; 1694 break;
1659 default: 1695 default:
1660 WARN_ON(1); 1696 WARN_ON(1);
1661 err = -EOPNOTSUPP; 1697 return -EOPNOTSUPP;
1662 goto out;
1663 } 1698 }
1664 1699
1665 if (!call) { 1700 if (!call)
1666 err = -EOPNOTSUPP; 1701 return -EOPNOTSUPP;
1667 goto out;
1668 }
1669 1702
1670 memset(&params, 0, sizeof(params)); 1703 memset(&params, 0, sizeof(params));
1671 1704
@@ -1695,52 +1728,25 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1695 haveinfo = 1; 1728 haveinfo = 1;
1696 } 1729 }
1697 1730
1698 if (!haveinfo) { 1731 if (!haveinfo)
1699 err = -EINVAL; 1732 return -EINVAL;
1700 goto out;
1701 }
1702
1703 err = call(&rdev->wiphy, dev, &params);
1704
1705 out:
1706 cfg80211_unlock_rdev(rdev);
1707 dev_put(dev);
1708 unlock_rtnl:
1709 rtnl_unlock();
1710 1733
1711 return err; 1734 return call(&rdev->wiphy, dev, &params);
1712} 1735}
1713 1736
1714static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) 1737static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1715{ 1738{
1716 struct cfg80211_registered_device *rdev; 1739 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1717 int err; 1740 struct net_device *dev = info->user_ptr[1];
1718 struct net_device *dev;
1719 1741
1720 rtnl_lock(); 1742 if (!rdev->ops->del_beacon)
1721 1743 return -EOPNOTSUPP;
1722 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1723 if (err)
1724 goto unlock_rtnl;
1725
1726 if (!rdev->ops->del_beacon) {
1727 err = -EOPNOTSUPP;
1728 goto out;
1729 }
1730
1731 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1732 err = -EOPNOTSUPP;
1733 goto out;
1734 }
1735 err = rdev->ops->del_beacon(&rdev->wiphy, dev);
1736 1744
1737 out: 1745 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1738 cfg80211_unlock_rdev(rdev); 1746 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1739 dev_put(dev); 1747 return -EOPNOTSUPP;
1740 unlock_rtnl:
1741 rtnl_unlock();
1742 1748
1743 return err; 1749 return rdev->ops->del_beacon(&rdev->wiphy, dev);
1744} 1750}
1745 1751
1746static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { 1752static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
@@ -1861,6 +1867,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1861 if (sinfo->filled & STATION_INFO_TX_PACKETS) 1867 if (sinfo->filled & STATION_INFO_TX_PACKETS)
1862 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS, 1868 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
1863 sinfo->tx_packets); 1869 sinfo->tx_packets);
1870 if (sinfo->filled & STATION_INFO_TX_RETRIES)
1871 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_RETRIES,
1872 sinfo->tx_retries);
1873 if (sinfo->filled & STATION_INFO_TX_FAILED)
1874 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
1875 sinfo->tx_failed);
1864 nla_nest_end(msg, sinfoattr); 1876 nla_nest_end(msg, sinfoattr);
1865 1877
1866 return genlmsg_end(msg, hdr); 1878 return genlmsg_end(msg, hdr);
@@ -1877,28 +1889,12 @@ static int nl80211_dump_station(struct sk_buff *skb,
1877 struct cfg80211_registered_device *dev; 1889 struct cfg80211_registered_device *dev;
1878 struct net_device *netdev; 1890 struct net_device *netdev;
1879 u8 mac_addr[ETH_ALEN]; 1891 u8 mac_addr[ETH_ALEN];
1880 int ifidx = cb->args[0];
1881 int sta_idx = cb->args[1]; 1892 int sta_idx = cb->args[1];
1882 int err; 1893 int err;
1883 1894
1884 if (!ifidx) 1895 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
1885 ifidx = nl80211_get_ifidx(cb); 1896 if (err)
1886 if (ifidx < 0) 1897 return err;
1887 return ifidx;
1888
1889 rtnl_lock();
1890
1891 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
1892 if (!netdev) {
1893 err = -ENODEV;
1894 goto out_rtnl;
1895 }
1896
1897 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
1898 if (IS_ERR(dev)) {
1899 err = PTR_ERR(dev);
1900 goto out_rtnl;
1901 }
1902 1898
1903 if (!dev->ops->dump_station) { 1899 if (!dev->ops->dump_station) {
1904 err = -EOPNOTSUPP; 1900 err = -EOPNOTSUPP;
@@ -1928,21 +1924,19 @@ static int nl80211_dump_station(struct sk_buff *skb,
1928 cb->args[1] = sta_idx; 1924 cb->args[1] = sta_idx;
1929 err = skb->len; 1925 err = skb->len;
1930 out_err: 1926 out_err:
1931 cfg80211_unlock_rdev(dev); 1927 nl80211_finish_netdev_dump(dev);
1932 out_rtnl:
1933 rtnl_unlock();
1934 1928
1935 return err; 1929 return err;
1936} 1930}
1937 1931
1938static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) 1932static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1939{ 1933{
1940 struct cfg80211_registered_device *rdev; 1934 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1941 int err; 1935 struct net_device *dev = info->user_ptr[1];
1942 struct net_device *dev;
1943 struct station_info sinfo; 1936 struct station_info sinfo;
1944 struct sk_buff *msg; 1937 struct sk_buff *msg;
1945 u8 *mac_addr = NULL; 1938 u8 *mac_addr = NULL;
1939 int err;
1946 1940
1947 memset(&sinfo, 0, sizeof(sinfo)); 1941 memset(&sinfo, 0, sizeof(sinfo));
1948 1942
@@ -1951,41 +1945,24 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1951 1945
1952 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1946 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1953 1947
1954 rtnl_lock(); 1948 if (!rdev->ops->get_station)
1955 1949 return -EOPNOTSUPP;
1956 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1957 if (err)
1958 goto out_rtnl;
1959
1960 if (!rdev->ops->get_station) {
1961 err = -EOPNOTSUPP;
1962 goto out;
1963 }
1964 1950
1965 err = rdev->ops->get_station(&rdev->wiphy, dev, mac_addr, &sinfo); 1951 err = rdev->ops->get_station(&rdev->wiphy, dev, mac_addr, &sinfo);
1966 if (err) 1952 if (err)
1967 goto out; 1953 return err;
1968 1954
1969 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1955 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1970 if (!msg) 1956 if (!msg)
1971 goto out; 1957 return -ENOMEM;
1972 1958
1973 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, 1959 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
1974 dev, mac_addr, &sinfo) < 0) 1960 dev, mac_addr, &sinfo) < 0) {
1975 goto out_free; 1961 nlmsg_free(msg);
1976 1962 return -ENOBUFS;
1977 err = genlmsg_reply(msg, info); 1963 }
1978 goto out;
1979
1980 out_free:
1981 nlmsg_free(msg);
1982 out:
1983 cfg80211_unlock_rdev(rdev);
1984 dev_put(dev);
1985 out_rtnl:
1986 rtnl_unlock();
1987 1964
1988 return err; 1965 return genlmsg_reply(msg, info);
1989} 1966}
1990 1967
1991/* 1968/*
@@ -2015,9 +1992,9 @@ static int get_vlan(struct genl_info *info,
2015 1992
2016static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) 1993static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2017{ 1994{
2018 struct cfg80211_registered_device *rdev; 1995 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2019 int err; 1996 int err;
2020 struct net_device *dev; 1997 struct net_device *dev = info->user_ptr[1];
2021 struct station_parameters params; 1998 struct station_parameters params;
2022 u8 *mac_addr = NULL; 1999 u8 *mac_addr = NULL;
2023 2000
@@ -2055,12 +2032,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2055 params.plink_action = 2032 params.plink_action =
2056 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 2033 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
2057 2034
2058 rtnl_lock();
2059
2060 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2061 if (err)
2062 goto out_rtnl;
2063
2064 err = get_vlan(info, rdev, &params.vlan); 2035 err = get_vlan(info, rdev, &params.vlan);
2065 if (err) 2036 if (err)
2066 goto out; 2037 goto out;
@@ -2071,10 +2042,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2071 switch (dev->ieee80211_ptr->iftype) { 2042 switch (dev->ieee80211_ptr->iftype) {
2072 case NL80211_IFTYPE_AP: 2043 case NL80211_IFTYPE_AP:
2073 case NL80211_IFTYPE_AP_VLAN: 2044 case NL80211_IFTYPE_AP_VLAN:
2045 case NL80211_IFTYPE_P2P_GO:
2074 /* disallow mesh-specific things */ 2046 /* disallow mesh-specific things */
2075 if (params.plink_action) 2047 if (params.plink_action)
2076 err = -EINVAL; 2048 err = -EINVAL;
2077 break; 2049 break;
2050 case NL80211_IFTYPE_P2P_CLIENT:
2078 case NL80211_IFTYPE_STATION: 2051 case NL80211_IFTYPE_STATION:
2079 /* disallow everything but AUTHORIZED flag */ 2052 /* disallow everything but AUTHORIZED flag */
2080 if (params.plink_action) 2053 if (params.plink_action)
@@ -2120,19 +2093,15 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2120 out: 2093 out:
2121 if (params.vlan) 2094 if (params.vlan)
2122 dev_put(params.vlan); 2095 dev_put(params.vlan);
2123 cfg80211_unlock_rdev(rdev);
2124 dev_put(dev);
2125 out_rtnl:
2126 rtnl_unlock();
2127 2096
2128 return err; 2097 return err;
2129} 2098}
2130 2099
2131static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) 2100static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2132{ 2101{
2133 struct cfg80211_registered_device *rdev; 2102 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2134 int err; 2103 int err;
2135 struct net_device *dev; 2104 struct net_device *dev = info->user_ptr[1];
2136 struct station_parameters params; 2105 struct station_parameters params;
2137 u8 *mac_addr = NULL; 2106 u8 *mac_addr = NULL;
2138 2107
@@ -2169,17 +2138,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2169 if (parse_station_flags(info, &params)) 2138 if (parse_station_flags(info, &params))
2170 return -EINVAL; 2139 return -EINVAL;
2171 2140
2172 rtnl_lock();
2173
2174 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2175 if (err)
2176 goto out_rtnl;
2177
2178 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2141 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2179 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { 2142 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2180 err = -EINVAL; 2143 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2181 goto out; 2144 return -EINVAL;
2182 }
2183 2145
2184 err = get_vlan(info, rdev, &params.vlan); 2146 err = get_vlan(info, rdev, &params.vlan);
2185 if (err) 2147 if (err)
@@ -2193,61 +2155,33 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2193 goto out; 2155 goto out;
2194 } 2156 }
2195 2157
2196 if (!netif_running(dev)) {
2197 err = -ENETDOWN;
2198 goto out;
2199 }
2200
2201 err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params); 2158 err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params);
2202 2159
2203 out: 2160 out:
2204 if (params.vlan) 2161 if (params.vlan)
2205 dev_put(params.vlan); 2162 dev_put(params.vlan);
2206 cfg80211_unlock_rdev(rdev);
2207 dev_put(dev);
2208 out_rtnl:
2209 rtnl_unlock();
2210
2211 return err; 2163 return err;
2212} 2164}
2213 2165
2214static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) 2166static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
2215{ 2167{
2216 struct cfg80211_registered_device *rdev; 2168 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2217 int err; 2169 struct net_device *dev = info->user_ptr[1];
2218 struct net_device *dev;
2219 u8 *mac_addr = NULL; 2170 u8 *mac_addr = NULL;
2220 2171
2221 if (info->attrs[NL80211_ATTR_MAC]) 2172 if (info->attrs[NL80211_ATTR_MAC])
2222 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 2173 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2223 2174
2224 rtnl_lock();
2225
2226 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2227 if (err)
2228 goto out_rtnl;
2229
2230 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2175 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2231 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && 2176 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2232 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2177 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
2233 err = -EINVAL; 2178 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2234 goto out; 2179 return -EINVAL;
2235 }
2236
2237 if (!rdev->ops->del_station) {
2238 err = -EOPNOTSUPP;
2239 goto out;
2240 }
2241
2242 err = rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
2243 2180
2244 out: 2181 if (!rdev->ops->del_station)
2245 cfg80211_unlock_rdev(rdev); 2182 return -EOPNOTSUPP;
2246 dev_put(dev);
2247 out_rtnl:
2248 rtnl_unlock();
2249 2183
2250 return err; 2184 return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
2251} 2185}
2252 2186
2253static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, 2187static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
@@ -2310,28 +2244,12 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
2310 struct net_device *netdev; 2244 struct net_device *netdev;
2311 u8 dst[ETH_ALEN]; 2245 u8 dst[ETH_ALEN];
2312 u8 next_hop[ETH_ALEN]; 2246 u8 next_hop[ETH_ALEN];
2313 int ifidx = cb->args[0];
2314 int path_idx = cb->args[1]; 2247 int path_idx = cb->args[1];
2315 int err; 2248 int err;
2316 2249
2317 if (!ifidx) 2250 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
2318 ifidx = nl80211_get_ifidx(cb); 2251 if (err)
2319 if (ifidx < 0) 2252 return err;
2320 return ifidx;
2321
2322 rtnl_lock();
2323
2324 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
2325 if (!netdev) {
2326 err = -ENODEV;
2327 goto out_rtnl;
2328 }
2329
2330 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
2331 if (IS_ERR(dev)) {
2332 err = PTR_ERR(dev);
2333 goto out_rtnl;
2334 }
2335 2253
2336 if (!dev->ops->dump_mpath) { 2254 if (!dev->ops->dump_mpath) {
2337 err = -EOPNOTSUPP; 2255 err = -EOPNOTSUPP;
@@ -2365,18 +2283,15 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
2365 cb->args[1] = path_idx; 2283 cb->args[1] = path_idx;
2366 err = skb->len; 2284 err = skb->len;
2367 out_err: 2285 out_err:
2368 cfg80211_unlock_rdev(dev); 2286 nl80211_finish_netdev_dump(dev);
2369 out_rtnl:
2370 rtnl_unlock();
2371
2372 return err; 2287 return err;
2373} 2288}
2374 2289
2375static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) 2290static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
2376{ 2291{
2377 struct cfg80211_registered_device *rdev; 2292 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2378 int err; 2293 int err;
2379 struct net_device *dev; 2294 struct net_device *dev = info->user_ptr[1];
2380 struct mpath_info pinfo; 2295 struct mpath_info pinfo;
2381 struct sk_buff *msg; 2296 struct sk_buff *msg;
2382 u8 *dst = NULL; 2297 u8 *dst = NULL;
@@ -2389,53 +2304,33 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
2389 2304
2390 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2305 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2391 2306
2392 rtnl_lock(); 2307 if (!rdev->ops->get_mpath)
2393 2308 return -EOPNOTSUPP;
2394 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2395 if (err)
2396 goto out_rtnl;
2397
2398 if (!rdev->ops->get_mpath) {
2399 err = -EOPNOTSUPP;
2400 goto out;
2401 }
2402 2309
2403 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2310 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2404 err = -EOPNOTSUPP; 2311 return -EOPNOTSUPP;
2405 goto out;
2406 }
2407 2312
2408 err = rdev->ops->get_mpath(&rdev->wiphy, dev, dst, next_hop, &pinfo); 2313 err = rdev->ops->get_mpath(&rdev->wiphy, dev, dst, next_hop, &pinfo);
2409 if (err) 2314 if (err)
2410 goto out; 2315 return err;
2411 2316
2412 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2317 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2413 if (!msg) 2318 if (!msg)
2414 goto out; 2319 return -ENOMEM;
2415 2320
2416 if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, 2321 if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0,
2417 dev, dst, next_hop, &pinfo) < 0) 2322 dev, dst, next_hop, &pinfo) < 0) {
2418 goto out_free; 2323 nlmsg_free(msg);
2419 2324 return -ENOBUFS;
2420 err = genlmsg_reply(msg, info); 2325 }
2421 goto out;
2422
2423 out_free:
2424 nlmsg_free(msg);
2425 out:
2426 cfg80211_unlock_rdev(rdev);
2427 dev_put(dev);
2428 out_rtnl:
2429 rtnl_unlock();
2430 2326
2431 return err; 2327 return genlmsg_reply(msg, info);
2432} 2328}
2433 2329
2434static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) 2330static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
2435{ 2331{
2436 struct cfg80211_registered_device *rdev; 2332 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2437 int err; 2333 struct net_device *dev = info->user_ptr[1];
2438 struct net_device *dev;
2439 u8 *dst = NULL; 2334 u8 *dst = NULL;
2440 u8 *next_hop = NULL; 2335 u8 *next_hop = NULL;
2441 2336
@@ -2448,42 +2343,19 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
2448 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2343 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2449 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 2344 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
2450 2345
2451 rtnl_lock(); 2346 if (!rdev->ops->change_mpath)
2452 2347 return -EOPNOTSUPP;
2453 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2454 if (err)
2455 goto out_rtnl;
2456
2457 if (!rdev->ops->change_mpath) {
2458 err = -EOPNOTSUPP;
2459 goto out;
2460 }
2461
2462 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
2463 err = -EOPNOTSUPP;
2464 goto out;
2465 }
2466
2467 if (!netif_running(dev)) {
2468 err = -ENETDOWN;
2469 goto out;
2470 }
2471
2472 err = rdev->ops->change_mpath(&rdev->wiphy, dev, dst, next_hop);
2473 2348
2474 out: 2349 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2475 cfg80211_unlock_rdev(rdev); 2350 return -EOPNOTSUPP;
2476 dev_put(dev);
2477 out_rtnl:
2478 rtnl_unlock();
2479 2351
2480 return err; 2352 return rdev->ops->change_mpath(&rdev->wiphy, dev, dst, next_hop);
2481} 2353}
2354
2482static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) 2355static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
2483{ 2356{
2484 struct cfg80211_registered_device *rdev; 2357 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2485 int err; 2358 struct net_device *dev = info->user_ptr[1];
2486 struct net_device *dev;
2487 u8 *dst = NULL; 2359 u8 *dst = NULL;
2488 u8 *next_hop = NULL; 2360 u8 *next_hop = NULL;
2489 2361
@@ -2496,75 +2368,34 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
2496 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2368 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2497 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 2369 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
2498 2370
2499 rtnl_lock(); 2371 if (!rdev->ops->add_mpath)
2500 2372 return -EOPNOTSUPP;
2501 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2502 if (err)
2503 goto out_rtnl;
2504
2505 if (!rdev->ops->add_mpath) {
2506 err = -EOPNOTSUPP;
2507 goto out;
2508 }
2509
2510 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
2511 err = -EOPNOTSUPP;
2512 goto out;
2513 }
2514
2515 if (!netif_running(dev)) {
2516 err = -ENETDOWN;
2517 goto out;
2518 }
2519
2520 err = rdev->ops->add_mpath(&rdev->wiphy, dev, dst, next_hop);
2521 2373
2522 out: 2374 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2523 cfg80211_unlock_rdev(rdev); 2375 return -EOPNOTSUPP;
2524 dev_put(dev);
2525 out_rtnl:
2526 rtnl_unlock();
2527 2376
2528 return err; 2377 return rdev->ops->add_mpath(&rdev->wiphy, dev, dst, next_hop);
2529} 2378}
2530 2379
2531static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) 2380static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
2532{ 2381{
2533 struct cfg80211_registered_device *rdev; 2382 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2534 int err; 2383 struct net_device *dev = info->user_ptr[1];
2535 struct net_device *dev;
2536 u8 *dst = NULL; 2384 u8 *dst = NULL;
2537 2385
2538 if (info->attrs[NL80211_ATTR_MAC]) 2386 if (info->attrs[NL80211_ATTR_MAC])
2539 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2387 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2540 2388
2541 rtnl_lock(); 2389 if (!rdev->ops->del_mpath)
2542 2390 return -EOPNOTSUPP;
2543 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2544 if (err)
2545 goto out_rtnl;
2546
2547 if (!rdev->ops->del_mpath) {
2548 err = -EOPNOTSUPP;
2549 goto out;
2550 }
2551
2552 err = rdev->ops->del_mpath(&rdev->wiphy, dev, dst);
2553
2554 out:
2555 cfg80211_unlock_rdev(rdev);
2556 dev_put(dev);
2557 out_rtnl:
2558 rtnl_unlock();
2559 2391
2560 return err; 2392 return rdev->ops->del_mpath(&rdev->wiphy, dev, dst);
2561} 2393}
2562 2394
2563static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) 2395static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2564{ 2396{
2565 struct cfg80211_registered_device *rdev; 2397 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2566 int err; 2398 struct net_device *dev = info->user_ptr[1];
2567 struct net_device *dev;
2568 struct bss_parameters params; 2399 struct bss_parameters params;
2569 2400
2570 memset(&params, 0, sizeof(params)); 2401 memset(&params, 0, sizeof(params));
@@ -2592,31 +2423,14 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2592 if (info->attrs[NL80211_ATTR_AP_ISOLATE]) 2423 if (info->attrs[NL80211_ATTR_AP_ISOLATE])
2593 params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); 2424 params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
2594 2425
2595 rtnl_lock(); 2426 if (!rdev->ops->change_bss)
2596 2427 return -EOPNOTSUPP;
2597 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2598 if (err)
2599 goto out_rtnl;
2600
2601 if (!rdev->ops->change_bss) {
2602 err = -EOPNOTSUPP;
2603 goto out;
2604 }
2605
2606 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
2607 err = -EOPNOTSUPP;
2608 goto out;
2609 }
2610
2611 err = rdev->ops->change_bss(&rdev->wiphy, dev, &params);
2612 2428
2613 out: 2429 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2614 cfg80211_unlock_rdev(rdev); 2430 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2615 dev_put(dev); 2431 return -EOPNOTSUPP;
2616 out_rtnl:
2617 rtnl_unlock();
2618 2432
2619 return err; 2433 return rdev->ops->change_bss(&rdev->wiphy, dev, &params);
2620} 2434}
2621 2435
2622static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { 2436static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
@@ -2695,37 +2509,26 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
2695static int nl80211_get_mesh_params(struct sk_buff *skb, 2509static int nl80211_get_mesh_params(struct sk_buff *skb,
2696 struct genl_info *info) 2510 struct genl_info *info)
2697{ 2511{
2698 struct cfg80211_registered_device *rdev; 2512 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2699 struct mesh_config cur_params; 2513 struct mesh_config cur_params;
2700 int err; 2514 int err;
2701 struct net_device *dev; 2515 struct net_device *dev = info->user_ptr[1];
2702 void *hdr; 2516 void *hdr;
2703 struct nlattr *pinfoattr; 2517 struct nlattr *pinfoattr;
2704 struct sk_buff *msg; 2518 struct sk_buff *msg;
2705 2519
2706 rtnl_lock(); 2520 if (!rdev->ops->get_mesh_params)
2707 2521 return -EOPNOTSUPP;
2708 /* Look up our device */
2709 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2710 if (err)
2711 goto out_rtnl;
2712
2713 if (!rdev->ops->get_mesh_params) {
2714 err = -EOPNOTSUPP;
2715 goto out;
2716 }
2717 2522
2718 /* Get the mesh params */ 2523 /* Get the mesh params */
2719 err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params); 2524 err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params);
2720 if (err) 2525 if (err)
2721 goto out; 2526 return err;
2722 2527
2723 /* Draw up a netlink message to send back */ 2528 /* Draw up a netlink message to send back */
2724 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2529 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2725 if (!msg) { 2530 if (!msg)
2726 err = -ENOBUFS; 2531 return -ENOMEM;
2727 goto out;
2728 }
2729 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 2532 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2730 NL80211_CMD_GET_MESH_PARAMS); 2533 NL80211_CMD_GET_MESH_PARAMS);
2731 if (!hdr) 2534 if (!hdr)
@@ -2764,21 +2567,12 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
2764 cur_params.dot11MeshHWMPRootMode); 2567 cur_params.dot11MeshHWMPRootMode);
2765 nla_nest_end(msg, pinfoattr); 2568 nla_nest_end(msg, pinfoattr);
2766 genlmsg_end(msg, hdr); 2569 genlmsg_end(msg, hdr);
2767 err = genlmsg_reply(msg, info); 2570 return genlmsg_reply(msg, info);
2768 goto out;
2769 2571
2770 nla_put_failure: 2572 nla_put_failure:
2771 genlmsg_cancel(msg, hdr); 2573 genlmsg_cancel(msg, hdr);
2772 nlmsg_free(msg); 2574 nlmsg_free(msg);
2773 err = -EMSGSIZE; 2575 return -ENOBUFS;
2774 out:
2775 /* Cleanup */
2776 cfg80211_unlock_rdev(rdev);
2777 dev_put(dev);
2778 out_rtnl:
2779 rtnl_unlock();
2780
2781 return err;
2782} 2576}
2783 2577
2784#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ 2578#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
@@ -2808,10 +2602,9 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
2808 2602
2809static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) 2603static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2810{ 2604{
2811 int err;
2812 u32 mask; 2605 u32 mask;
2813 struct cfg80211_registered_device *rdev; 2606 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2814 struct net_device *dev; 2607 struct net_device *dev = info->user_ptr[1];
2815 struct mesh_config cfg; 2608 struct mesh_config cfg;
2816 struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; 2609 struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
2817 struct nlattr *parent_attr; 2610 struct nlattr *parent_attr;
@@ -2823,16 +2616,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2823 parent_attr, nl80211_meshconf_params_policy)) 2616 parent_attr, nl80211_meshconf_params_policy))
2824 return -EINVAL; 2617 return -EINVAL;
2825 2618
2826 rtnl_lock(); 2619 if (!rdev->ops->set_mesh_params)
2827 2620 return -EOPNOTSUPP;
2828 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2829 if (err)
2830 goto out_rtnl;
2831
2832 if (!rdev->ops->set_mesh_params) {
2833 err = -EOPNOTSUPP;
2834 goto out;
2835 }
2836 2621
2837 /* This makes sure that there aren't more than 32 mesh config 2622 /* This makes sure that there aren't more than 32 mesh config
2838 * parameters (otherwise our bitfield scheme would not work.) */ 2623 * parameters (otherwise our bitfield scheme would not work.) */
@@ -2878,16 +2663,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2878 nla_get_u8); 2663 nla_get_u8);
2879 2664
2880 /* Apply changes */ 2665 /* Apply changes */
2881 err = rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); 2666 return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
2882
2883 out:
2884 /* cleanup */
2885 cfg80211_unlock_rdev(rdev);
2886 dev_put(dev);
2887 out_rtnl:
2888 rtnl_unlock();
2889
2890 return err;
2891} 2667}
2892 2668
2893#undef FILL_IN_MESH_PARAM_IF_SET 2669#undef FILL_IN_MESH_PARAM_IF_SET
@@ -3070,8 +2846,8 @@ static int validate_scan_freqs(struct nlattr *freqs)
3070 2846
3071static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) 2847static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3072{ 2848{
3073 struct cfg80211_registered_device *rdev; 2849 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3074 struct net_device *dev; 2850 struct net_device *dev = info->user_ptr[1];
3075 struct cfg80211_scan_request *request; 2851 struct cfg80211_scan_request *request;
3076 struct cfg80211_ssid *ssid; 2852 struct cfg80211_ssid *ssid;
3077 struct ieee80211_channel *channel; 2853 struct ieee80211_channel *channel;
@@ -3084,36 +2860,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3084 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 2860 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3085 return -EINVAL; 2861 return -EINVAL;
3086 2862
3087 rtnl_lock();
3088
3089 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3090 if (err)
3091 goto out_rtnl;
3092
3093 wiphy = &rdev->wiphy; 2863 wiphy = &rdev->wiphy;
3094 2864
3095 if (!rdev->ops->scan) { 2865 if (!rdev->ops->scan)
3096 err = -EOPNOTSUPP; 2866 return -EOPNOTSUPP;
3097 goto out;
3098 }
3099
3100 if (!netif_running(dev)) {
3101 err = -ENETDOWN;
3102 goto out;
3103 }
3104 2867
3105 if (rdev->scan_req) { 2868 if (rdev->scan_req)
3106 err = -EBUSY; 2869 return -EBUSY;
3107 goto out;
3108 }
3109 2870
3110 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { 2871 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
3111 n_channels = validate_scan_freqs( 2872 n_channels = validate_scan_freqs(
3112 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); 2873 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
3113 if (!n_channels) { 2874 if (!n_channels)
3114 err = -EINVAL; 2875 return -EINVAL;
3115 goto out;
3116 }
3117 } else { 2876 } else {
3118 n_channels = 0; 2877 n_channels = 0;
3119 2878
@@ -3126,29 +2885,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3126 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) 2885 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
3127 n_ssids++; 2886 n_ssids++;
3128 2887
3129 if (n_ssids > wiphy->max_scan_ssids) { 2888 if (n_ssids > wiphy->max_scan_ssids)
3130 err = -EINVAL; 2889 return -EINVAL;
3131 goto out;
3132 }
3133 2890
3134 if (info->attrs[NL80211_ATTR_IE]) 2891 if (info->attrs[NL80211_ATTR_IE])
3135 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 2892 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3136 else 2893 else
3137 ie_len = 0; 2894 ie_len = 0;
3138 2895
3139 if (ie_len > wiphy->max_scan_ie_len) { 2896 if (ie_len > wiphy->max_scan_ie_len)
3140 err = -EINVAL; 2897 return -EINVAL;
3141 goto out;
3142 }
3143 2898
3144 request = kzalloc(sizeof(*request) 2899 request = kzalloc(sizeof(*request)
3145 + sizeof(*ssid) * n_ssids 2900 + sizeof(*ssid) * n_ssids
3146 + sizeof(channel) * n_channels 2901 + sizeof(channel) * n_channels
3147 + ie_len, GFP_KERNEL); 2902 + ie_len, GFP_KERNEL);
3148 if (!request) { 2903 if (!request)
3149 err = -ENOMEM; 2904 return -ENOMEM;
3150 goto out;
3151 }
3152 2905
3153 if (n_ssids) 2906 if (n_ssids)
3154 request->ssids = (void *)&request->channels[n_channels]; 2907 request->ssids = (void *)&request->channels[n_channels];
@@ -3236,18 +2989,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3236 if (!err) { 2989 if (!err) {
3237 nl80211_send_scan_start(rdev, dev); 2990 nl80211_send_scan_start(rdev, dev);
3238 dev_hold(dev); 2991 dev_hold(dev);
3239 } 2992 } else {
3240
3241 out_free: 2993 out_free:
3242 if (err) {
3243 rdev->scan_req = NULL; 2994 rdev->scan_req = NULL;
3244 kfree(request); 2995 kfree(request);
3245 } 2996 }
3246 out:
3247 cfg80211_unlock_rdev(rdev);
3248 dev_put(dev);
3249 out_rtnl:
3250 rtnl_unlock();
3251 2997
3252 return err; 2998 return err;
3253} 2999}
@@ -3306,6 +3052,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
3306 } 3052 }
3307 3053
3308 switch (wdev->iftype) { 3054 switch (wdev->iftype) {
3055 case NL80211_IFTYPE_P2P_CLIENT:
3309 case NL80211_IFTYPE_STATION: 3056 case NL80211_IFTYPE_STATION:
3310 if (intbss == wdev->current_bss) 3057 if (intbss == wdev->current_bss)
3311 NLA_PUT_U32(msg, NL80211_BSS_STATUS, 3058 NLA_PUT_U32(msg, NL80211_BSS_STATUS,
@@ -3343,25 +3090,12 @@ static int nl80211_dump_scan(struct sk_buff *skb,
3343 struct net_device *dev; 3090 struct net_device *dev;
3344 struct cfg80211_internal_bss *scan; 3091 struct cfg80211_internal_bss *scan;
3345 struct wireless_dev *wdev; 3092 struct wireless_dev *wdev;
3346 int ifidx = cb->args[0];
3347 int start = cb->args[1], idx = 0; 3093 int start = cb->args[1], idx = 0;
3348 int err; 3094 int err;
3349 3095
3350 if (!ifidx) 3096 err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev);
3351 ifidx = nl80211_get_ifidx(cb); 3097 if (err)
3352 if (ifidx < 0) 3098 return err;
3353 return ifidx;
3354 cb->args[0] = ifidx;
3355
3356 dev = dev_get_by_index(sock_net(skb->sk), ifidx);
3357 if (!dev)
3358 return -ENODEV;
3359
3360 rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
3361 if (IS_ERR(rdev)) {
3362 err = PTR_ERR(rdev);
3363 goto out_put_netdev;
3364 }
3365 3099
3366 wdev = dev->ieee80211_ptr; 3100 wdev = dev->ieee80211_ptr;
3367 3101
@@ -3377,21 +3111,17 @@ static int nl80211_dump_scan(struct sk_buff *skb,
3377 cb->nlh->nlmsg_seq, NLM_F_MULTI, 3111 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3378 rdev, wdev, scan) < 0) { 3112 rdev, wdev, scan) < 0) {
3379 idx--; 3113 idx--;
3380 goto out; 3114 break;
3381 } 3115 }
3382 } 3116 }
3383 3117
3384 out:
3385 spin_unlock_bh(&rdev->bss_lock); 3118 spin_unlock_bh(&rdev->bss_lock);
3386 wdev_unlock(wdev); 3119 wdev_unlock(wdev);
3387 3120
3388 cb->args[1] = idx; 3121 cb->args[1] = idx;
3389 err = skb->len; 3122 nl80211_finish_netdev_dump(rdev);
3390 cfg80211_unlock_rdev(rdev);
3391 out_put_netdev:
3392 dev_put(dev);
3393 3123
3394 return err; 3124 return skb->len;
3395} 3125}
3396 3126
3397static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, 3127static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
@@ -3421,6 +3151,23 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
3421 if (survey->filled & SURVEY_INFO_NOISE_DBM) 3151 if (survey->filled & SURVEY_INFO_NOISE_DBM)
3422 NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE, 3152 NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE,
3423 survey->noise); 3153 survey->noise);
3154 if (survey->filled & SURVEY_INFO_IN_USE)
3155 NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE);
3156 if (survey->filled & SURVEY_INFO_CHANNEL_TIME)
3157 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
3158 survey->channel_time);
3159 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
3160 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
3161 survey->channel_time_busy);
3162 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
3163 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
3164 survey->channel_time_ext_busy);
3165 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_RX)
3166 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
3167 survey->channel_time_rx);
3168 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_TX)
3169 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
3170 survey->channel_time_tx);
3424 3171
3425 nla_nest_end(msg, infoattr); 3172 nla_nest_end(msg, infoattr);
3426 3173
@@ -3437,29 +3184,12 @@ static int nl80211_dump_survey(struct sk_buff *skb,
3437 struct survey_info survey; 3184 struct survey_info survey;
3438 struct cfg80211_registered_device *dev; 3185 struct cfg80211_registered_device *dev;
3439 struct net_device *netdev; 3186 struct net_device *netdev;
3440 int ifidx = cb->args[0];
3441 int survey_idx = cb->args[1]; 3187 int survey_idx = cb->args[1];
3442 int res; 3188 int res;
3443 3189
3444 if (!ifidx) 3190 res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
3445 ifidx = nl80211_get_ifidx(cb); 3191 if (res)
3446 if (ifidx < 0) 3192 return res;
3447 return ifidx;
3448 cb->args[0] = ifidx;
3449
3450 rtnl_lock();
3451
3452 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
3453 if (!netdev) {
3454 res = -ENODEV;
3455 goto out_rtnl;
3456 }
3457
3458 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
3459 if (IS_ERR(dev)) {
3460 res = PTR_ERR(dev);
3461 goto out_rtnl;
3462 }
3463 3193
3464 if (!dev->ops->dump_survey) { 3194 if (!dev->ops->dump_survey) {
3465 res = -EOPNOTSUPP; 3195 res = -EOPNOTSUPP;
@@ -3487,10 +3217,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
3487 cb->args[1] = survey_idx; 3217 cb->args[1] = survey_idx;
3488 res = skb->len; 3218 res = skb->len;
3489 out_err: 3219 out_err:
3490 cfg80211_unlock_rdev(dev); 3220 nl80211_finish_netdev_dump(dev);
3491 out_rtnl:
3492 rtnl_unlock();
3493
3494 return res; 3221 return res;
3495} 3222}
3496 3223
@@ -3523,8 +3250,8 @@ static bool nl80211_valid_cipher_suite(u32 cipher)
3523 3250
3524static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) 3251static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3525{ 3252{
3526 struct cfg80211_registered_device *rdev; 3253 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3527 struct net_device *dev; 3254 struct net_device *dev = info->user_ptr[1];
3528 struct ieee80211_channel *chan; 3255 struct ieee80211_channel *chan;
3529 const u8 *bssid, *ssid, *ie = NULL; 3256 const u8 *bssid, *ssid, *ie = NULL;
3530 int err, ssid_len, ie_len = 0; 3257 int err, ssid_len, ie_len = 0;
@@ -3552,6 +3279,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3552 return err; 3279 return err;
3553 3280
3554 if (key.idx >= 0) { 3281 if (key.idx >= 0) {
3282 if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP)
3283 return -EINVAL;
3555 if (!key.p.key || !key.p.key_len) 3284 if (!key.p.key || !key.p.key_len)
3556 return -EINVAL; 3285 return -EINVAL;
3557 if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || 3286 if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 ||
@@ -3566,34 +3295,31 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3566 key.p.key = NULL; 3295 key.p.key = NULL;
3567 } 3296 }
3568 3297
3569 rtnl_lock(); 3298 if (key.idx >= 0) {
3570 3299 int i;
3571 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3300 bool ok = false;
3572 if (err) 3301 for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
3573 goto unlock_rtnl; 3302 if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
3574 3303 ok = true;
3575 if (!rdev->ops->auth) { 3304 break;
3576 err = -EOPNOTSUPP; 3305 }
3577 goto out; 3306 }
3307 if (!ok)
3308 return -EINVAL;
3578 } 3309 }
3579 3310
3580 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3311 if (!rdev->ops->auth)
3581 err = -EOPNOTSUPP; 3312 return -EOPNOTSUPP;
3582 goto out;
3583 }
3584 3313
3585 if (!netif_running(dev)) { 3314 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3586 err = -ENETDOWN; 3315 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3587 goto out; 3316 return -EOPNOTSUPP;
3588 }
3589 3317
3590 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3318 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3591 chan = ieee80211_get_channel(&rdev->wiphy, 3319 chan = ieee80211_get_channel(&rdev->wiphy,
3592 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3320 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3593 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) { 3321 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
3594 err = -EINVAL; 3322 return -EINVAL;
3595 goto out;
3596 }
3597 3323
3598 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); 3324 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
3599 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 3325 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
@@ -3604,27 +3330,19 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3604 } 3330 }
3605 3331
3606 auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); 3332 auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
3607 if (!nl80211_valid_auth_type(auth_type)) { 3333 if (!nl80211_valid_auth_type(auth_type))
3608 err = -EINVAL; 3334 return -EINVAL;
3609 goto out;
3610 }
3611 3335
3612 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3336 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3613 3337
3614 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 3338 return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
3615 ssid, ssid_len, ie, ie_len, 3339 ssid, ssid_len, ie, ie_len,
3616 key.p.key, key.p.key_len, key.idx, 3340 key.p.key, key.p.key_len, key.idx,
3617 local_state_change); 3341 local_state_change);
3618
3619out:
3620 cfg80211_unlock_rdev(rdev);
3621 dev_put(dev);
3622unlock_rtnl:
3623 rtnl_unlock();
3624 return err;
3625} 3342}
3626 3343
3627static int nl80211_crypto_settings(struct genl_info *info, 3344static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
3345 struct genl_info *info,
3628 struct cfg80211_crypto_settings *settings, 3346 struct cfg80211_crypto_settings *settings,
3629 int cipher_limit) 3347 int cipher_limit)
3630{ 3348{
@@ -3632,6 +3350,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
3632 3350
3633 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; 3351 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
3634 3352
3353 if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
3354 u16 proto;
3355 proto = nla_get_u16(
3356 info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
3357 settings->control_port_ethertype = cpu_to_be16(proto);
3358 if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
3359 proto != ETH_P_PAE)
3360 return -EINVAL;
3361 if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
3362 settings->control_port_no_encrypt = true;
3363 } else
3364 settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
3365
3635 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { 3366 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
3636 void *data; 3367 void *data;
3637 int len, i; 3368 int len, i;
@@ -3691,8 +3422,8 @@ static int nl80211_crypto_settings(struct genl_info *info,
3691 3422
3692static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) 3423static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3693{ 3424{
3694 struct cfg80211_registered_device *rdev; 3425 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3695 struct net_device *dev; 3426 struct net_device *dev = info->user_ptr[1];
3696 struct cfg80211_crypto_settings crypto; 3427 struct cfg80211_crypto_settings crypto;
3697 struct ieee80211_channel *chan; 3428 struct ieee80211_channel *chan;
3698 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; 3429 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
@@ -3707,35 +3438,19 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3707 !info->attrs[NL80211_ATTR_WIPHY_FREQ]) 3438 !info->attrs[NL80211_ATTR_WIPHY_FREQ])
3708 return -EINVAL; 3439 return -EINVAL;
3709 3440
3710 rtnl_lock(); 3441 if (!rdev->ops->assoc)
3711 3442 return -EOPNOTSUPP;
3712 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3713 if (err)
3714 goto unlock_rtnl;
3715
3716 if (!rdev->ops->assoc) {
3717 err = -EOPNOTSUPP;
3718 goto out;
3719 }
3720
3721 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3722 err = -EOPNOTSUPP;
3723 goto out;
3724 }
3725 3443
3726 if (!netif_running(dev)) { 3444 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3727 err = -ENETDOWN; 3445 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3728 goto out; 3446 return -EOPNOTSUPP;
3729 }
3730 3447
3731 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3448 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3732 3449
3733 chan = ieee80211_get_channel(&rdev->wiphy, 3450 chan = ieee80211_get_channel(&rdev->wiphy,
3734 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3451 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3735 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) { 3452 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
3736 err = -EINVAL; 3453 return -EINVAL;
3737 goto out;
3738 }
3739 3454
3740 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); 3455 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
3741 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 3456 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
@@ -3750,35 +3465,28 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3750 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 3465 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
3751 if (mfp == NL80211_MFP_REQUIRED) 3466 if (mfp == NL80211_MFP_REQUIRED)
3752 use_mfp = true; 3467 use_mfp = true;
3753 else if (mfp != NL80211_MFP_NO) { 3468 else if (mfp != NL80211_MFP_NO)
3754 err = -EINVAL; 3469 return -EINVAL;
3755 goto out;
3756 }
3757 } 3470 }
3758 3471
3759 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 3472 if (info->attrs[NL80211_ATTR_PREV_BSSID])
3760 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 3473 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
3761 3474
3762 err = nl80211_crypto_settings(info, &crypto, 1); 3475 err = nl80211_crypto_settings(rdev, info, &crypto, 1);
3763 if (!err) 3476 if (!err)
3764 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 3477 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
3765 ssid, ssid_len, ie, ie_len, use_mfp, 3478 ssid, ssid_len, ie, ie_len, use_mfp,
3766 &crypto); 3479 &crypto);
3767 3480
3768out:
3769 cfg80211_unlock_rdev(rdev);
3770 dev_put(dev);
3771unlock_rtnl:
3772 rtnl_unlock();
3773 return err; 3481 return err;
3774} 3482}
3775 3483
3776static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) 3484static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3777{ 3485{
3778 struct cfg80211_registered_device *rdev; 3486 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3779 struct net_device *dev; 3487 struct net_device *dev = info->user_ptr[1];
3780 const u8 *ie = NULL, *bssid; 3488 const u8 *ie = NULL, *bssid;
3781 int err, ie_len = 0; 3489 int ie_len = 0;
3782 u16 reason_code; 3490 u16 reason_code;
3783 bool local_state_change; 3491 bool local_state_change;
3784 3492
@@ -3791,34 +3499,19 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3791 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3499 if (!info->attrs[NL80211_ATTR_REASON_CODE])
3792 return -EINVAL; 3500 return -EINVAL;
3793 3501
3794 rtnl_lock(); 3502 if (!rdev->ops->deauth)
3795 3503 return -EOPNOTSUPP;
3796 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3797 if (err)
3798 goto unlock_rtnl;
3799
3800 if (!rdev->ops->deauth) {
3801 err = -EOPNOTSUPP;
3802 goto out;
3803 }
3804
3805 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3806 err = -EOPNOTSUPP;
3807 goto out;
3808 }
3809 3504
3810 if (!netif_running(dev)) { 3505 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3811 err = -ENETDOWN; 3506 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3812 goto out; 3507 return -EOPNOTSUPP;
3813 }
3814 3508
3815 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3509 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3816 3510
3817 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); 3511 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
3818 if (reason_code == 0) { 3512 if (reason_code == 0) {
3819 /* Reason Code 0 is reserved */ 3513 /* Reason Code 0 is reserved */
3820 err = -EINVAL; 3514 return -EINVAL;
3821 goto out;
3822 } 3515 }
3823 3516
3824 if (info->attrs[NL80211_ATTR_IE]) { 3517 if (info->attrs[NL80211_ATTR_IE]) {
@@ -3828,23 +3521,16 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3828 3521
3829 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3522 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3830 3523
3831 err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, 3524 return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
3832 local_state_change); 3525 local_state_change);
3833
3834out:
3835 cfg80211_unlock_rdev(rdev);
3836 dev_put(dev);
3837unlock_rtnl:
3838 rtnl_unlock();
3839 return err;
3840} 3526}
3841 3527
3842static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) 3528static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3843{ 3529{
3844 struct cfg80211_registered_device *rdev; 3530 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3845 struct net_device *dev; 3531 struct net_device *dev = info->user_ptr[1];
3846 const u8 *ie = NULL, *bssid; 3532 const u8 *ie = NULL, *bssid;
3847 int err, ie_len = 0; 3533 int ie_len = 0;
3848 u16 reason_code; 3534 u16 reason_code;
3849 bool local_state_change; 3535 bool local_state_change;
3850 3536
@@ -3857,34 +3543,19 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3857 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3543 if (!info->attrs[NL80211_ATTR_REASON_CODE])
3858 return -EINVAL; 3544 return -EINVAL;
3859 3545
3860 rtnl_lock(); 3546 if (!rdev->ops->disassoc)
3861 3547 return -EOPNOTSUPP;
3862 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3863 if (err)
3864 goto unlock_rtnl;
3865
3866 if (!rdev->ops->disassoc) {
3867 err = -EOPNOTSUPP;
3868 goto out;
3869 }
3870
3871 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3872 err = -EOPNOTSUPP;
3873 goto out;
3874 }
3875 3548
3876 if (!netif_running(dev)) { 3549 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3877 err = -ENETDOWN; 3550 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3878 goto out; 3551 return -EOPNOTSUPP;
3879 }
3880 3552
3881 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3553 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3882 3554
3883 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); 3555 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
3884 if (reason_code == 0) { 3556 if (reason_code == 0) {
3885 /* Reason Code 0 is reserved */ 3557 /* Reason Code 0 is reserved */
3886 err = -EINVAL; 3558 return -EINVAL;
3887 goto out;
3888 } 3559 }
3889 3560
3890 if (info->attrs[NL80211_ATTR_IE]) { 3561 if (info->attrs[NL80211_ATTR_IE]) {
@@ -3894,21 +3565,14 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3894 3565
3895 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3566 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3896 3567
3897 err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, 3568 return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
3898 local_state_change); 3569 local_state_change);
3899
3900out:
3901 cfg80211_unlock_rdev(rdev);
3902 dev_put(dev);
3903unlock_rtnl:
3904 rtnl_unlock();
3905 return err;
3906} 3570}
3907 3571
3908static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) 3572static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3909{ 3573{
3910 struct cfg80211_registered_device *rdev; 3574 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3911 struct net_device *dev; 3575 struct net_device *dev = info->user_ptr[1];
3912 struct cfg80211_ibss_params ibss; 3576 struct cfg80211_ibss_params ibss;
3913 struct wiphy *wiphy; 3577 struct wiphy *wiphy;
3914 struct cfg80211_cached_keys *connkeys = NULL; 3578 struct cfg80211_cached_keys *connkeys = NULL;
@@ -3933,26 +3597,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3933 return -EINVAL; 3597 return -EINVAL;
3934 } 3598 }
3935 3599
3936 rtnl_lock(); 3600 if (!rdev->ops->join_ibss)
3937 3601 return -EOPNOTSUPP;
3938 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3939 if (err)
3940 goto unlock_rtnl;
3941
3942 if (!rdev->ops->join_ibss) {
3943 err = -EOPNOTSUPP;
3944 goto out;
3945 }
3946
3947 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
3948 err = -EOPNOTSUPP;
3949 goto out;
3950 }
3951 3602
3952 if (!netif_running(dev)) { 3603 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
3953 err = -ENETDOWN; 3604 return -EOPNOTSUPP;
3954 goto out;
3955 }
3956 3605
3957 wiphy = &rdev->wiphy; 3606 wiphy = &rdev->wiphy;
3958 3607
@@ -3970,24 +3619,12 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3970 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3619 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3971 if (!ibss.channel || 3620 if (!ibss.channel ||
3972 ibss.channel->flags & IEEE80211_CHAN_NO_IBSS || 3621 ibss.channel->flags & IEEE80211_CHAN_NO_IBSS ||
3973 ibss.channel->flags & IEEE80211_CHAN_DISABLED) { 3622 ibss.channel->flags & IEEE80211_CHAN_DISABLED)
3974 err = -EINVAL; 3623 return -EINVAL;
3975 goto out;
3976 }
3977 3624
3978 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; 3625 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
3979 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; 3626 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
3980 3627
3981 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
3982 connkeys = nl80211_parse_connkeys(rdev,
3983 info->attrs[NL80211_ATTR_KEYS]);
3984 if (IS_ERR(connkeys)) {
3985 err = PTR_ERR(connkeys);
3986 connkeys = NULL;
3987 goto out;
3988 }
3989 }
3990
3991 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { 3628 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
3992 u8 *rates = 3629 u8 *rates =
3993 nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); 3630 nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
@@ -3997,10 +3634,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3997 wiphy->bands[ibss.channel->band]; 3634 wiphy->bands[ibss.channel->band];
3998 int i, j; 3635 int i, j;
3999 3636
4000 if (n_rates == 0) { 3637 if (n_rates == 0)
4001 err = -EINVAL; 3638 return -EINVAL;
4002 goto out;
4003 }
4004 3639
4005 for (i = 0; i < n_rates; i++) { 3640 for (i = 0; i < n_rates; i++) {
4006 int rate = (rates[i] & 0x7f) * 5; 3641 int rate = (rates[i] & 0x7f) * 5;
@@ -4013,77 +3648,36 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
4013 break; 3648 break;
4014 } 3649 }
4015 } 3650 }
4016 if (!found) { 3651 if (!found)
4017 err = -EINVAL; 3652 return -EINVAL;
4018 goto out;
4019 }
4020 }
4021 } else {
4022 /*
4023 * If no rates were explicitly configured,
4024 * use the mandatory rate set for 11b or
4025 * 11a for maximum compatibility.
4026 */
4027 struct ieee80211_supported_band *sband =
4028 wiphy->bands[ibss.channel->band];
4029 int j;
4030 u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
4031 IEEE80211_RATE_MANDATORY_A :
4032 IEEE80211_RATE_MANDATORY_B;
4033
4034 for (j = 0; j < sband->n_bitrates; j++) {
4035 if (sband->bitrates[j].flags & flag)
4036 ibss.basic_rates |= BIT(j);
4037 } 3653 }
4038 } 3654 }
4039 3655
4040 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); 3656 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
3657 connkeys = nl80211_parse_connkeys(rdev,
3658 info->attrs[NL80211_ATTR_KEYS]);
3659 if (IS_ERR(connkeys))
3660 return PTR_ERR(connkeys);
3661 }
4041 3662
4042out: 3663 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
4043 cfg80211_unlock_rdev(rdev);
4044 dev_put(dev);
4045unlock_rtnl:
4046 if (err) 3664 if (err)
4047 kfree(connkeys); 3665 kfree(connkeys);
4048 rtnl_unlock();
4049 return err; 3666 return err;
4050} 3667}
4051 3668
4052static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) 3669static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info)
4053{ 3670{
4054 struct cfg80211_registered_device *rdev; 3671 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4055 struct net_device *dev; 3672 struct net_device *dev = info->user_ptr[1];
4056 int err;
4057
4058 rtnl_lock();
4059
4060 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4061 if (err)
4062 goto unlock_rtnl;
4063
4064 if (!rdev->ops->leave_ibss) {
4065 err = -EOPNOTSUPP;
4066 goto out;
4067 }
4068
4069 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
4070 err = -EOPNOTSUPP;
4071 goto out;
4072 }
4073 3673
4074 if (!netif_running(dev)) { 3674 if (!rdev->ops->leave_ibss)
4075 err = -ENETDOWN; 3675 return -EOPNOTSUPP;
4076 goto out;
4077 }
4078 3676
4079 err = cfg80211_leave_ibss(rdev, dev, false); 3677 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
3678 return -EOPNOTSUPP;
4080 3679
4081out: 3680 return cfg80211_leave_ibss(rdev, dev, false);
4082 cfg80211_unlock_rdev(rdev);
4083 dev_put(dev);
4084unlock_rtnl:
4085 rtnl_unlock();
4086 return err;
4087} 3681}
4088 3682
4089#ifdef CONFIG_NL80211_TESTMODE 3683#ifdef CONFIG_NL80211_TESTMODE
@@ -4093,20 +3687,12 @@ static struct genl_multicast_group nl80211_testmode_mcgrp = {
4093 3687
4094static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) 3688static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
4095{ 3689{
4096 struct cfg80211_registered_device *rdev; 3690 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4097 int err; 3691 int err;
4098 3692
4099 if (!info->attrs[NL80211_ATTR_TESTDATA]) 3693 if (!info->attrs[NL80211_ATTR_TESTDATA])
4100 return -EINVAL; 3694 return -EINVAL;
4101 3695
4102 rtnl_lock();
4103
4104 rdev = cfg80211_get_dev_from_info(info);
4105 if (IS_ERR(rdev)) {
4106 err = PTR_ERR(rdev);
4107 goto unlock_rtnl;
4108 }
4109
4110 err = -EOPNOTSUPP; 3696 err = -EOPNOTSUPP;
4111 if (rdev->ops->testmode_cmd) { 3697 if (rdev->ops->testmode_cmd) {
4112 rdev->testmode_info = info; 3698 rdev->testmode_info = info;
@@ -4116,10 +3702,6 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
4116 rdev->testmode_info = NULL; 3702 rdev->testmode_info = NULL;
4117 } 3703 }
4118 3704
4119 cfg80211_unlock_rdev(rdev);
4120
4121 unlock_rtnl:
4122 rtnl_unlock();
4123 return err; 3705 return err;
4124} 3706}
4125 3707
@@ -4210,8 +3792,8 @@ EXPORT_SYMBOL(cfg80211_testmode_event);
4210 3792
4211static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) 3793static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4212{ 3794{
4213 struct cfg80211_registered_device *rdev; 3795 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4214 struct net_device *dev; 3796 struct net_device *dev = info->user_ptr[1];
4215 struct cfg80211_connect_params connect; 3797 struct cfg80211_connect_params connect;
4216 struct wiphy *wiphy; 3798 struct wiphy *wiphy;
4217 struct cfg80211_cached_keys *connkeys = NULL; 3799 struct cfg80211_cached_keys *connkeys = NULL;
@@ -4236,25 +3818,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4236 3818
4237 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; 3819 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
4238 3820
4239 err = nl80211_crypto_settings(info, &connect.crypto, 3821 err = nl80211_crypto_settings(rdev, info, &connect.crypto,
4240 NL80211_MAX_NR_CIPHER_SUITES); 3822 NL80211_MAX_NR_CIPHER_SUITES);
4241 if (err) 3823 if (err)
4242 return err; 3824 return err;
4243 rtnl_lock();
4244 3825
4245 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3826 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4246 if (err) 3827 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4247 goto unlock_rtnl; 3828 return -EOPNOTSUPP;
4248
4249 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4250 err = -EOPNOTSUPP;
4251 goto out;
4252 }
4253
4254 if (!netif_running(dev)) {
4255 err = -ENETDOWN;
4256 goto out;
4257 }
4258 3829
4259 wiphy = &rdev->wiphy; 3830 wiphy = &rdev->wiphy;
4260 3831
@@ -4273,39 +3844,27 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4273 ieee80211_get_channel(wiphy, 3844 ieee80211_get_channel(wiphy,
4274 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3845 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
4275 if (!connect.channel || 3846 if (!connect.channel ||
4276 connect.channel->flags & IEEE80211_CHAN_DISABLED) { 3847 connect.channel->flags & IEEE80211_CHAN_DISABLED)
4277 err = -EINVAL; 3848 return -EINVAL;
4278 goto out;
4279 }
4280 } 3849 }
4281 3850
4282 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { 3851 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
4283 connkeys = nl80211_parse_connkeys(rdev, 3852 connkeys = nl80211_parse_connkeys(rdev,
4284 info->attrs[NL80211_ATTR_KEYS]); 3853 info->attrs[NL80211_ATTR_KEYS]);
4285 if (IS_ERR(connkeys)) { 3854 if (IS_ERR(connkeys))
4286 err = PTR_ERR(connkeys); 3855 return PTR_ERR(connkeys);
4287 connkeys = NULL;
4288 goto out;
4289 }
4290 } 3856 }
4291 3857
4292 err = cfg80211_connect(rdev, dev, &connect, connkeys); 3858 err = cfg80211_connect(rdev, dev, &connect, connkeys);
4293
4294out:
4295 cfg80211_unlock_rdev(rdev);
4296 dev_put(dev);
4297unlock_rtnl:
4298 if (err) 3859 if (err)
4299 kfree(connkeys); 3860 kfree(connkeys);
4300 rtnl_unlock();
4301 return err; 3861 return err;
4302} 3862}
4303 3863
4304static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) 3864static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4305{ 3865{
4306 struct cfg80211_registered_device *rdev; 3866 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4307 struct net_device *dev; 3867 struct net_device *dev = info->user_ptr[1];
4308 int err;
4309 u16 reason; 3868 u16 reason;
4310 3869
4311 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3870 if (!info->attrs[NL80211_ATTR_REASON_CODE])
@@ -4316,35 +3875,16 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4316 if (reason == 0) 3875 if (reason == 0)
4317 return -EINVAL; 3876 return -EINVAL;
4318 3877
4319 rtnl_lock(); 3878 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4320 3879 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4321 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3880 return -EOPNOTSUPP;
4322 if (err)
4323 goto unlock_rtnl;
4324
4325 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4326 err = -EOPNOTSUPP;
4327 goto out;
4328 }
4329
4330 if (!netif_running(dev)) {
4331 err = -ENETDOWN;
4332 goto out;
4333 }
4334
4335 err = cfg80211_disconnect(rdev, dev, reason, true);
4336 3881
4337out: 3882 return cfg80211_disconnect(rdev, dev, reason, true);
4338 cfg80211_unlock_rdev(rdev);
4339 dev_put(dev);
4340unlock_rtnl:
4341 rtnl_unlock();
4342 return err;
4343} 3883}
4344 3884
4345static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) 3885static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
4346{ 3886{
4347 struct cfg80211_registered_device *rdev; 3887 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4348 struct net *net; 3888 struct net *net;
4349 int err; 3889 int err;
4350 u32 pid; 3890 u32 pid;
@@ -4354,43 +3894,26 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
4354 3894
4355 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); 3895 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
4356 3896
4357 rtnl_lock();
4358
4359 rdev = cfg80211_get_dev_from_info(info);
4360 if (IS_ERR(rdev)) {
4361 err = PTR_ERR(rdev);
4362 goto out_rtnl;
4363 }
4364
4365 net = get_net_ns_by_pid(pid); 3897 net = get_net_ns_by_pid(pid);
4366 if (IS_ERR(net)) { 3898 if (IS_ERR(net))
4367 err = PTR_ERR(net); 3899 return PTR_ERR(net);
4368 goto out;
4369 }
4370 3900
4371 err = 0; 3901 err = 0;
4372 3902
4373 /* check if anything to do */ 3903 /* check if anything to do */
4374 if (net_eq(wiphy_net(&rdev->wiphy), net)) 3904 if (!net_eq(wiphy_net(&rdev->wiphy), net))
4375 goto out_put_net; 3905 err = cfg80211_switch_netns(rdev, net);
4376 3906
4377 err = cfg80211_switch_netns(rdev, net);
4378 out_put_net:
4379 put_net(net); 3907 put_net(net);
4380 out:
4381 cfg80211_unlock_rdev(rdev);
4382 out_rtnl:
4383 rtnl_unlock();
4384 return err; 3908 return err;
4385} 3909}
4386 3910
4387static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) 3911static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4388{ 3912{
4389 struct cfg80211_registered_device *rdev; 3913 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4390 int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, 3914 int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev,
4391 struct cfg80211_pmksa *pmksa) = NULL; 3915 struct cfg80211_pmksa *pmksa) = NULL;
4392 int err; 3916 struct net_device *dev = info->user_ptr[1];
4393 struct net_device *dev;
4394 struct cfg80211_pmksa pmksa; 3917 struct cfg80211_pmksa pmksa;
4395 3918
4396 memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); 3919 memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
@@ -4401,19 +3924,12 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4401 if (!info->attrs[NL80211_ATTR_PMKID]) 3924 if (!info->attrs[NL80211_ATTR_PMKID])
4402 return -EINVAL; 3925 return -EINVAL;
4403 3926
4404 rtnl_lock();
4405
4406 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4407 if (err)
4408 goto out_rtnl;
4409
4410 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); 3927 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
4411 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3928 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
4412 3929
4413 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3930 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4414 err = -EOPNOTSUPP; 3931 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4415 goto out; 3932 return -EOPNOTSUPP;
4416 }
4417 3933
4418 switch (info->genlhdr->cmd) { 3934 switch (info->genlhdr->cmd) {
4419 case NL80211_CMD_SET_PMKSA: 3935 case NL80211_CMD_SET_PMKSA:
@@ -4427,61 +3943,32 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4427 break; 3943 break;
4428 } 3944 }
4429 3945
4430 if (!rdev_ops) { 3946 if (!rdev_ops)
4431 err = -EOPNOTSUPP; 3947 return -EOPNOTSUPP;
4432 goto out;
4433 }
4434
4435 err = rdev_ops(&rdev->wiphy, dev, &pmksa);
4436
4437 out:
4438 cfg80211_unlock_rdev(rdev);
4439 dev_put(dev);
4440 out_rtnl:
4441 rtnl_unlock();
4442 3948
4443 return err; 3949 return rdev_ops(&rdev->wiphy, dev, &pmksa);
4444} 3950}
4445 3951
4446static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) 3952static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
4447{ 3953{
4448 struct cfg80211_registered_device *rdev; 3954 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4449 int err; 3955 struct net_device *dev = info->user_ptr[1];
4450 struct net_device *dev;
4451
4452 rtnl_lock();
4453
4454 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4455 if (err)
4456 goto out_rtnl;
4457
4458 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4459 err = -EOPNOTSUPP;
4460 goto out;
4461 }
4462
4463 if (!rdev->ops->flush_pmksa) {
4464 err = -EOPNOTSUPP;
4465 goto out;
4466 }
4467 3956
4468 err = rdev->ops->flush_pmksa(&rdev->wiphy, dev); 3957 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4469 3958 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4470 out: 3959 return -EOPNOTSUPP;
4471 cfg80211_unlock_rdev(rdev);
4472 dev_put(dev);
4473 out_rtnl:
4474 rtnl_unlock();
4475 3960
4476 return err; 3961 if (!rdev->ops->flush_pmksa)
3962 return -EOPNOTSUPP;
4477 3963
3964 return rdev->ops->flush_pmksa(&rdev->wiphy, dev);
4478} 3965}
4479 3966
4480static int nl80211_remain_on_channel(struct sk_buff *skb, 3967static int nl80211_remain_on_channel(struct sk_buff *skb,
4481 struct genl_info *info) 3968 struct genl_info *info)
4482{ 3969{
4483 struct cfg80211_registered_device *rdev; 3970 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4484 struct net_device *dev; 3971 struct net_device *dev = info->user_ptr[1];
4485 struct ieee80211_channel *chan; 3972 struct ieee80211_channel *chan;
4486 struct sk_buff *msg; 3973 struct sk_buff *msg;
4487 void *hdr; 3974 void *hdr;
@@ -4503,21 +3990,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4503 if (!duration || !msecs_to_jiffies(duration) || duration > 5000) 3990 if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
4504 return -EINVAL; 3991 return -EINVAL;
4505 3992
4506 rtnl_lock(); 3993 if (!rdev->ops->remain_on_channel)
4507 3994 return -EOPNOTSUPP;
4508 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4509 if (err)
4510 goto unlock_rtnl;
4511
4512 if (!rdev->ops->remain_on_channel) {
4513 err = -EOPNOTSUPP;
4514 goto out;
4515 }
4516
4517 if (!netif_running(dev)) {
4518 err = -ENETDOWN;
4519 goto out;
4520 }
4521 3995
4522 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { 3996 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
4523 channel_type = nla_get_u32( 3997 channel_type = nla_get_u32(
@@ -4525,24 +3999,18 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4525 if (channel_type != NL80211_CHAN_NO_HT && 3999 if (channel_type != NL80211_CHAN_NO_HT &&
4526 channel_type != NL80211_CHAN_HT20 && 4000 channel_type != NL80211_CHAN_HT20 &&
4527 channel_type != NL80211_CHAN_HT40PLUS && 4001 channel_type != NL80211_CHAN_HT40PLUS &&
4528 channel_type != NL80211_CHAN_HT40MINUS) { 4002 channel_type != NL80211_CHAN_HT40MINUS)
4529 err = -EINVAL; 4003 return -EINVAL;
4530 goto out;
4531 }
4532 } 4004 }
4533 4005
4534 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4006 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
4535 chan = rdev_freq_to_chan(rdev, freq, channel_type); 4007 chan = rdev_freq_to_chan(rdev, freq, channel_type);
4536 if (chan == NULL) { 4008 if (chan == NULL)
4537 err = -EINVAL; 4009 return -EINVAL;
4538 goto out;
4539 }
4540 4010
4541 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4011 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4542 if (!msg) { 4012 if (!msg)
4543 err = -ENOMEM; 4013 return -ENOMEM;
4544 goto out;
4545 }
4546 4014
4547 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4015 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4548 NL80211_CMD_REMAIN_ON_CHANNEL); 4016 NL80211_CMD_REMAIN_ON_CHANNEL);
@@ -4561,58 +4029,32 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4561 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); 4029 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
4562 4030
4563 genlmsg_end(msg, hdr); 4031 genlmsg_end(msg, hdr);
4564 err = genlmsg_reply(msg, info); 4032
4565 goto out; 4033 return genlmsg_reply(msg, info);
4566 4034
4567 nla_put_failure: 4035 nla_put_failure:
4568 err = -ENOBUFS; 4036 err = -ENOBUFS;
4569 free_msg: 4037 free_msg:
4570 nlmsg_free(msg); 4038 nlmsg_free(msg);
4571 out:
4572 cfg80211_unlock_rdev(rdev);
4573 dev_put(dev);
4574 unlock_rtnl:
4575 rtnl_unlock();
4576 return err; 4039 return err;
4577} 4040}
4578 4041
4579static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, 4042static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
4580 struct genl_info *info) 4043 struct genl_info *info)
4581{ 4044{
4582 struct cfg80211_registered_device *rdev; 4045 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4583 struct net_device *dev; 4046 struct net_device *dev = info->user_ptr[1];
4584 u64 cookie; 4047 u64 cookie;
4585 int err;
4586 4048
4587 if (!info->attrs[NL80211_ATTR_COOKIE]) 4049 if (!info->attrs[NL80211_ATTR_COOKIE])
4588 return -EINVAL; 4050 return -EINVAL;
4589 4051
4590 rtnl_lock(); 4052 if (!rdev->ops->cancel_remain_on_channel)
4591 4053 return -EOPNOTSUPP;
4592 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4593 if (err)
4594 goto unlock_rtnl;
4595
4596 if (!rdev->ops->cancel_remain_on_channel) {
4597 err = -EOPNOTSUPP;
4598 goto out;
4599 }
4600
4601 if (!netif_running(dev)) {
4602 err = -ENETDOWN;
4603 goto out;
4604 }
4605 4054
4606 cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); 4055 cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
4607 4056
4608 err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); 4057 return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie);
4609
4610 out:
4611 cfg80211_unlock_rdev(rdev);
4612 dev_put(dev);
4613 unlock_rtnl:
4614 rtnl_unlock();
4615 return err;
4616} 4058}
4617 4059
4618static u32 rateset_to_mask(struct ieee80211_supported_band *sband, 4060static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
@@ -4648,26 +4090,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4648 struct genl_info *info) 4090 struct genl_info *info)
4649{ 4091{
4650 struct nlattr *tb[NL80211_TXRATE_MAX + 1]; 4092 struct nlattr *tb[NL80211_TXRATE_MAX + 1];
4651 struct cfg80211_registered_device *rdev; 4093 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4652 struct cfg80211_bitrate_mask mask; 4094 struct cfg80211_bitrate_mask mask;
4653 int err, rem, i; 4095 int rem, i;
4654 struct net_device *dev; 4096 struct net_device *dev = info->user_ptr[1];
4655 struct nlattr *tx_rates; 4097 struct nlattr *tx_rates;
4656 struct ieee80211_supported_band *sband; 4098 struct ieee80211_supported_band *sband;
4657 4099
4658 if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) 4100 if (info->attrs[NL80211_ATTR_TX_RATES] == NULL)
4659 return -EINVAL; 4101 return -EINVAL;
4660 4102
4661 rtnl_lock(); 4103 if (!rdev->ops->set_bitrate_mask)
4662 4104 return -EOPNOTSUPP;
4663 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4664 if (err)
4665 goto unlock_rtnl;
4666
4667 if (!rdev->ops->set_bitrate_mask) {
4668 err = -EOPNOTSUPP;
4669 goto unlock;
4670 }
4671 4105
4672 memset(&mask, 0, sizeof(mask)); 4106 memset(&mask, 0, sizeof(mask));
4673 /* Default to all rates enabled */ 4107 /* Default to all rates enabled */
@@ -4684,15 +4118,11 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4684 nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) 4118 nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
4685 { 4119 {
4686 enum ieee80211_band band = nla_type(tx_rates); 4120 enum ieee80211_band band = nla_type(tx_rates);
4687 if (band < 0 || band >= IEEE80211_NUM_BANDS) { 4121 if (band < 0 || band >= IEEE80211_NUM_BANDS)
4688 err = -EINVAL; 4122 return -EINVAL;
4689 goto unlock;
4690 }
4691 sband = rdev->wiphy.bands[band]; 4123 sband = rdev->wiphy.bands[band];
4692 if (sband == NULL) { 4124 if (sband == NULL)
4693 err = -EINVAL; 4125 return -EINVAL;
4694 goto unlock;
4695 }
4696 nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), 4126 nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
4697 nla_len(tx_rates), nl80211_txattr_policy); 4127 nla_len(tx_rates), nl80211_txattr_policy);
4698 if (tb[NL80211_TXRATE_LEGACY]) { 4128 if (tb[NL80211_TXRATE_LEGACY]) {
@@ -4700,68 +4130,48 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4700 sband, 4130 sband,
4701 nla_data(tb[NL80211_TXRATE_LEGACY]), 4131 nla_data(tb[NL80211_TXRATE_LEGACY]),
4702 nla_len(tb[NL80211_TXRATE_LEGACY])); 4132 nla_len(tb[NL80211_TXRATE_LEGACY]));
4703 if (mask.control[band].legacy == 0) { 4133 if (mask.control[band].legacy == 0)
4704 err = -EINVAL; 4134 return -EINVAL;
4705 goto unlock;
4706 }
4707 } 4135 }
4708 } 4136 }
4709 4137
4710 err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask); 4138 return rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask);
4711
4712 unlock:
4713 dev_put(dev);
4714 cfg80211_unlock_rdev(rdev);
4715 unlock_rtnl:
4716 rtnl_unlock();
4717 return err;
4718} 4139}
4719 4140
4720static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) 4141static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
4721{ 4142{
4722 struct cfg80211_registered_device *rdev; 4143 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4723 struct net_device *dev; 4144 struct net_device *dev = info->user_ptr[1];
4724 int err; 4145 u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
4725 4146
4726 if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) 4147 if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
4727 return -EINVAL; 4148 return -EINVAL;
4728 4149
4729 if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) 4150 if (info->attrs[NL80211_ATTR_FRAME_TYPE])
4730 return -EINVAL; 4151 frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
4731
4732 rtnl_lock();
4733
4734 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4735 if (err)
4736 goto unlock_rtnl;
4737 4152
4738 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4153 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4739 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4154 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4740 err = -EOPNOTSUPP; 4155 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
4741 goto out; 4156 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
4742 } 4157 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
4158 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
4159 return -EOPNOTSUPP;
4743 4160
4744 /* not much point in registering if we can't reply */ 4161 /* not much point in registering if we can't reply */
4745 if (!rdev->ops->action) { 4162 if (!rdev->ops->mgmt_tx)
4746 err = -EOPNOTSUPP; 4163 return -EOPNOTSUPP;
4747 goto out;
4748 }
4749 4164
4750 err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, 4165 return cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
4166 frame_type,
4751 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), 4167 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
4752 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); 4168 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
4753 out:
4754 cfg80211_unlock_rdev(rdev);
4755 dev_put(dev);
4756 unlock_rtnl:
4757 rtnl_unlock();
4758 return err;
4759} 4169}
4760 4170
4761static int nl80211_action(struct sk_buff *skb, struct genl_info *info) 4171static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
4762{ 4172{
4763 struct cfg80211_registered_device *rdev; 4173 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4764 struct net_device *dev; 4174 struct net_device *dev = info->user_ptr[1];
4765 struct ieee80211_channel *chan; 4175 struct ieee80211_channel *chan;
4766 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 4176 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
4767 bool channel_type_valid = false; 4177 bool channel_type_valid = false;
@@ -4775,27 +4185,16 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4775 !info->attrs[NL80211_ATTR_WIPHY_FREQ]) 4185 !info->attrs[NL80211_ATTR_WIPHY_FREQ])
4776 return -EINVAL; 4186 return -EINVAL;
4777 4187
4778 rtnl_lock(); 4188 if (!rdev->ops->mgmt_tx)
4779 4189 return -EOPNOTSUPP;
4780 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4781 if (err)
4782 goto unlock_rtnl;
4783
4784 if (!rdev->ops->action) {
4785 err = -EOPNOTSUPP;
4786 goto out;
4787 }
4788 4190
4789 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4191 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4790 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4192 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4791 err = -EOPNOTSUPP; 4193 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
4792 goto out; 4194 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
4793 } 4195 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
4794 4196 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
4795 if (!netif_running(dev)) { 4197 return -EOPNOTSUPP;
4796 err = -ENETDOWN;
4797 goto out;
4798 }
4799 4198
4800 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { 4199 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
4801 channel_type = nla_get_u32( 4200 channel_type = nla_get_u32(
@@ -4803,147 +4202,104 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4803 if (channel_type != NL80211_CHAN_NO_HT && 4202 if (channel_type != NL80211_CHAN_NO_HT &&
4804 channel_type != NL80211_CHAN_HT20 && 4203 channel_type != NL80211_CHAN_HT20 &&
4805 channel_type != NL80211_CHAN_HT40PLUS && 4204 channel_type != NL80211_CHAN_HT40PLUS &&
4806 channel_type != NL80211_CHAN_HT40MINUS) { 4205 channel_type != NL80211_CHAN_HT40MINUS)
4807 err = -EINVAL; 4206 return -EINVAL;
4808 goto out;
4809 }
4810 channel_type_valid = true; 4207 channel_type_valid = true;
4811 } 4208 }
4812 4209
4813 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4210 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
4814 chan = rdev_freq_to_chan(rdev, freq, channel_type); 4211 chan = rdev_freq_to_chan(rdev, freq, channel_type);
4815 if (chan == NULL) { 4212 if (chan == NULL)
4816 err = -EINVAL; 4213 return -EINVAL;
4817 goto out;
4818 }
4819 4214
4820 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4215 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4821 if (!msg) { 4216 if (!msg)
4822 err = -ENOMEM; 4217 return -ENOMEM;
4823 goto out;
4824 }
4825 4218
4826 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4219 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4827 NL80211_CMD_ACTION); 4220 NL80211_CMD_FRAME);
4828 4221
4829 if (IS_ERR(hdr)) { 4222 if (IS_ERR(hdr)) {
4830 err = PTR_ERR(hdr); 4223 err = PTR_ERR(hdr);
4831 goto free_msg; 4224 goto free_msg;
4832 } 4225 }
4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4226 err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
4834 channel_type_valid, 4227 channel_type_valid,
4835 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4228 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4836 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4229 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4837 &cookie); 4230 &cookie);
4838 if (err) 4231 if (err)
4839 goto free_msg; 4232 goto free_msg;
4840 4233
4841 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); 4234 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
4842 4235
4843 genlmsg_end(msg, hdr); 4236 genlmsg_end(msg, hdr);
4844 err = genlmsg_reply(msg, info); 4237 return genlmsg_reply(msg, info);
4845 goto out;
4846 4238
4847 nla_put_failure: 4239 nla_put_failure:
4848 err = -ENOBUFS; 4240 err = -ENOBUFS;
4849 free_msg: 4241 free_msg:
4850 nlmsg_free(msg); 4242 nlmsg_free(msg);
4851 out:
4852 cfg80211_unlock_rdev(rdev);
4853 dev_put(dev);
4854unlock_rtnl:
4855 rtnl_unlock();
4856 return err; 4243 return err;
4857} 4244}
4858 4245
4859static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) 4246static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
4860{ 4247{
4861 struct cfg80211_registered_device *rdev; 4248 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4862 struct wireless_dev *wdev; 4249 struct wireless_dev *wdev;
4863 struct net_device *dev; 4250 struct net_device *dev = info->user_ptr[1];
4864 u8 ps_state; 4251 u8 ps_state;
4865 bool state; 4252 bool state;
4866 int err; 4253 int err;
4867 4254
4868 if (!info->attrs[NL80211_ATTR_PS_STATE]) { 4255 if (!info->attrs[NL80211_ATTR_PS_STATE])
4869 err = -EINVAL; 4256 return -EINVAL;
4870 goto out;
4871 }
4872 4257
4873 ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]); 4258 ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
4874 4259
4875 if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED) { 4260 if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED)
4876 err = -EINVAL; 4261 return -EINVAL;
4877 goto out;
4878 }
4879
4880 rtnl_lock();
4881
4882 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4883 if (err)
4884 goto unlock_rdev;
4885 4262
4886 wdev = dev->ieee80211_ptr; 4263 wdev = dev->ieee80211_ptr;
4887 4264
4888 if (!rdev->ops->set_power_mgmt) { 4265 if (!rdev->ops->set_power_mgmt)
4889 err = -EOPNOTSUPP; 4266 return -EOPNOTSUPP;
4890 goto unlock_rdev;
4891 }
4892 4267
4893 state = (ps_state == NL80211_PS_ENABLED) ? true : false; 4268 state = (ps_state == NL80211_PS_ENABLED) ? true : false;
4894 4269
4895 if (state == wdev->ps) 4270 if (state == wdev->ps)
4896 goto unlock_rdev; 4271 return 0;
4897
4898 wdev->ps = state;
4899
4900 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps,
4901 wdev->ps_timeout))
4902 /* assume this means it's off */
4903 wdev->ps = false;
4904
4905unlock_rdev:
4906 cfg80211_unlock_rdev(rdev);
4907 dev_put(dev);
4908 rtnl_unlock();
4909 4272
4910out: 4273 err = rdev->ops->set_power_mgmt(wdev->wiphy, dev, state,
4274 wdev->ps_timeout);
4275 if (!err)
4276 wdev->ps = state;
4911 return err; 4277 return err;
4912} 4278}
4913 4279
4914static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) 4280static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
4915{ 4281{
4916 struct cfg80211_registered_device *rdev; 4282 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4917 enum nl80211_ps_state ps_state; 4283 enum nl80211_ps_state ps_state;
4918 struct wireless_dev *wdev; 4284 struct wireless_dev *wdev;
4919 struct net_device *dev; 4285 struct net_device *dev = info->user_ptr[1];
4920 struct sk_buff *msg; 4286 struct sk_buff *msg;
4921 void *hdr; 4287 void *hdr;
4922 int err; 4288 int err;
4923 4289
4924 rtnl_lock();
4925
4926 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4927 if (err)
4928 goto unlock_rtnl;
4929
4930 wdev = dev->ieee80211_ptr; 4290 wdev = dev->ieee80211_ptr;
4931 4291
4932 if (!rdev->ops->set_power_mgmt) { 4292 if (!rdev->ops->set_power_mgmt)
4933 err = -EOPNOTSUPP; 4293 return -EOPNOTSUPP;
4934 goto out;
4935 }
4936 4294
4937 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4295 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4938 if (!msg) { 4296 if (!msg)
4939 err = -ENOMEM; 4297 return -ENOMEM;
4940 goto out;
4941 }
4942 4298
4943 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4299 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4944 NL80211_CMD_GET_POWER_SAVE); 4300 NL80211_CMD_GET_POWER_SAVE);
4945 if (!hdr) { 4301 if (!hdr) {
4946 err = -ENOMEM; 4302 err = -ENOBUFS;
4947 goto free_msg; 4303 goto free_msg;
4948 } 4304 }
4949 4305
@@ -4955,22 +4311,12 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
4955 NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state); 4311 NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state);
4956 4312
4957 genlmsg_end(msg, hdr); 4313 genlmsg_end(msg, hdr);
4958 err = genlmsg_reply(msg, info); 4314 return genlmsg_reply(msg, info);
4959 goto out;
4960 4315
4961nla_put_failure: 4316 nla_put_failure:
4962 err = -ENOBUFS; 4317 err = -ENOBUFS;
4963 4318 free_msg:
4964free_msg:
4965 nlmsg_free(msg); 4319 nlmsg_free(msg);
4966
4967out:
4968 cfg80211_unlock_rdev(rdev);
4969 dev_put(dev);
4970
4971unlock_rtnl:
4972 rtnl_unlock();
4973
4974 return err; 4320 return err;
4975} 4321}
4976 4322
@@ -4984,41 +4330,24 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
4984static int nl80211_set_cqm_rssi(struct genl_info *info, 4330static int nl80211_set_cqm_rssi(struct genl_info *info,
4985 s32 threshold, u32 hysteresis) 4331 s32 threshold, u32 hysteresis)
4986{ 4332{
4987 struct cfg80211_registered_device *rdev; 4333 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4988 struct wireless_dev *wdev; 4334 struct wireless_dev *wdev;
4989 struct net_device *dev; 4335 struct net_device *dev = info->user_ptr[1];
4990 int err;
4991 4336
4992 if (threshold > 0) 4337 if (threshold > 0)
4993 return -EINVAL; 4338 return -EINVAL;
4994 4339
4995 rtnl_lock();
4996
4997 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4998 if (err)
4999 goto unlock_rdev;
5000
5001 wdev = dev->ieee80211_ptr; 4340 wdev = dev->ieee80211_ptr;
5002 4341
5003 if (!rdev->ops->set_cqm_rssi_config) { 4342 if (!rdev->ops->set_cqm_rssi_config)
5004 err = -EOPNOTSUPP; 4343 return -EOPNOTSUPP;
5005 goto unlock_rdev;
5006 }
5007
5008 if (wdev->iftype != NL80211_IFTYPE_STATION) {
5009 err = -EOPNOTSUPP;
5010 goto unlock_rdev;
5011 }
5012
5013 err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
5014 threshold, hysteresis);
5015 4344
5016unlock_rdev: 4345 if (wdev->iftype != NL80211_IFTYPE_STATION &&
5017 cfg80211_unlock_rdev(rdev); 4346 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
5018 dev_put(dev); 4347 return -EOPNOTSUPP;
5019 rtnl_unlock();
5020 4348
5021 return err; 4349 return rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
4350 threshold, hysteresis);
5022} 4351}
5023 4352
5024static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) 4353static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
@@ -5052,6 +4381,65 @@ out:
5052 return err; 4381 return err;
5053} 4382}
5054 4383
4384#define NL80211_FLAG_NEED_WIPHY 0x01
4385#define NL80211_FLAG_NEED_NETDEV 0x02
4386#define NL80211_FLAG_NEED_RTNL 0x04
4387#define NL80211_FLAG_CHECK_NETDEV_UP 0x08
4388#define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\
4389 NL80211_FLAG_CHECK_NETDEV_UP)
4390
4391static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
4392 struct genl_info *info)
4393{
4394 struct cfg80211_registered_device *rdev;
4395 struct net_device *dev;
4396 int err;
4397 bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL;
4398
4399 if (rtnl)
4400 rtnl_lock();
4401
4402 if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) {
4403 rdev = cfg80211_get_dev_from_info(info);
4404 if (IS_ERR(rdev)) {
4405 if (rtnl)
4406 rtnl_unlock();
4407 return PTR_ERR(rdev);
4408 }
4409 info->user_ptr[0] = rdev;
4410 } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
4411 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4412 if (err) {
4413 if (rtnl)
4414 rtnl_unlock();
4415 return err;
4416 }
4417 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
4418 !netif_running(dev)) {
4419 cfg80211_unlock_rdev(rdev);
4420 dev_put(dev);
4421 if (rtnl)
4422 rtnl_unlock();
4423 return -ENETDOWN;
4424 }
4425 info->user_ptr[0] = rdev;
4426 info->user_ptr[1] = dev;
4427 }
4428
4429 return 0;
4430}
4431
4432static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
4433 struct genl_info *info)
4434{
4435 if (info->user_ptr[0])
4436 cfg80211_unlock_rdev(info->user_ptr[0]);
4437 if (info->user_ptr[1])
4438 dev_put(info->user_ptr[1]);
4439 if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
4440 rtnl_unlock();
4441}
4442
5055static struct genl_ops nl80211_ops[] = { 4443static struct genl_ops nl80211_ops[] = {
5056 { 4444 {
5057 .cmd = NL80211_CMD_GET_WIPHY, 4445 .cmd = NL80211_CMD_GET_WIPHY,
@@ -5059,12 +4447,14 @@ static struct genl_ops nl80211_ops[] = {
5059 .dumpit = nl80211_dump_wiphy, 4447 .dumpit = nl80211_dump_wiphy,
5060 .policy = nl80211_policy, 4448 .policy = nl80211_policy,
5061 /* can be retrieved by unprivileged users */ 4449 /* can be retrieved by unprivileged users */
4450 .internal_flags = NL80211_FLAG_NEED_WIPHY,
5062 }, 4451 },
5063 { 4452 {
5064 .cmd = NL80211_CMD_SET_WIPHY, 4453 .cmd = NL80211_CMD_SET_WIPHY,
5065 .doit = nl80211_set_wiphy, 4454 .doit = nl80211_set_wiphy,
5066 .policy = nl80211_policy, 4455 .policy = nl80211_policy,
5067 .flags = GENL_ADMIN_PERM, 4456 .flags = GENL_ADMIN_PERM,
4457 .internal_flags = NL80211_FLAG_NEED_RTNL,
5068 }, 4458 },
5069 { 4459 {
5070 .cmd = NL80211_CMD_GET_INTERFACE, 4460 .cmd = NL80211_CMD_GET_INTERFACE,
@@ -5072,90 +4462,119 @@ static struct genl_ops nl80211_ops[] = {
5072 .dumpit = nl80211_dump_interface, 4462 .dumpit = nl80211_dump_interface,
5073 .policy = nl80211_policy, 4463 .policy = nl80211_policy,
5074 /* can be retrieved by unprivileged users */ 4464 /* can be retrieved by unprivileged users */
4465 .internal_flags = NL80211_FLAG_NEED_NETDEV,
5075 }, 4466 },
5076 { 4467 {
5077 .cmd = NL80211_CMD_SET_INTERFACE, 4468 .cmd = NL80211_CMD_SET_INTERFACE,
5078 .doit = nl80211_set_interface, 4469 .doit = nl80211_set_interface,
5079 .policy = nl80211_policy, 4470 .policy = nl80211_policy,
5080 .flags = GENL_ADMIN_PERM, 4471 .flags = GENL_ADMIN_PERM,
4472 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4473 NL80211_FLAG_NEED_RTNL,
5081 }, 4474 },
5082 { 4475 {
5083 .cmd = NL80211_CMD_NEW_INTERFACE, 4476 .cmd = NL80211_CMD_NEW_INTERFACE,
5084 .doit = nl80211_new_interface, 4477 .doit = nl80211_new_interface,
5085 .policy = nl80211_policy, 4478 .policy = nl80211_policy,
5086 .flags = GENL_ADMIN_PERM, 4479 .flags = GENL_ADMIN_PERM,
4480 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4481 NL80211_FLAG_NEED_RTNL,
5087 }, 4482 },
5088 { 4483 {
5089 .cmd = NL80211_CMD_DEL_INTERFACE, 4484 .cmd = NL80211_CMD_DEL_INTERFACE,
5090 .doit = nl80211_del_interface, 4485 .doit = nl80211_del_interface,
5091 .policy = nl80211_policy, 4486 .policy = nl80211_policy,
5092 .flags = GENL_ADMIN_PERM, 4487 .flags = GENL_ADMIN_PERM,
4488 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4489 NL80211_FLAG_NEED_RTNL,
5093 }, 4490 },
5094 { 4491 {
5095 .cmd = NL80211_CMD_GET_KEY, 4492 .cmd = NL80211_CMD_GET_KEY,
5096 .doit = nl80211_get_key, 4493 .doit = nl80211_get_key,
5097 .policy = nl80211_policy, 4494 .policy = nl80211_policy,
5098 .flags = GENL_ADMIN_PERM, 4495 .flags = GENL_ADMIN_PERM,
4496 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4497 NL80211_FLAG_NEED_RTNL,
5099 }, 4498 },
5100 { 4499 {
5101 .cmd = NL80211_CMD_SET_KEY, 4500 .cmd = NL80211_CMD_SET_KEY,
5102 .doit = nl80211_set_key, 4501 .doit = nl80211_set_key,
5103 .policy = nl80211_policy, 4502 .policy = nl80211_policy,
5104 .flags = GENL_ADMIN_PERM, 4503 .flags = GENL_ADMIN_PERM,
4504 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4505 NL80211_FLAG_NEED_RTNL,
5105 }, 4506 },
5106 { 4507 {
5107 .cmd = NL80211_CMD_NEW_KEY, 4508 .cmd = NL80211_CMD_NEW_KEY,
5108 .doit = nl80211_new_key, 4509 .doit = nl80211_new_key,
5109 .policy = nl80211_policy, 4510 .policy = nl80211_policy,
5110 .flags = GENL_ADMIN_PERM, 4511 .flags = GENL_ADMIN_PERM,
4512 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4513 NL80211_FLAG_NEED_RTNL,
5111 }, 4514 },
5112 { 4515 {
5113 .cmd = NL80211_CMD_DEL_KEY, 4516 .cmd = NL80211_CMD_DEL_KEY,
5114 .doit = nl80211_del_key, 4517 .doit = nl80211_del_key,
5115 .policy = nl80211_policy, 4518 .policy = nl80211_policy,
5116 .flags = GENL_ADMIN_PERM, 4519 .flags = GENL_ADMIN_PERM,
4520 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4521 NL80211_FLAG_NEED_RTNL,
5117 }, 4522 },
5118 { 4523 {
5119 .cmd = NL80211_CMD_SET_BEACON, 4524 .cmd = NL80211_CMD_SET_BEACON,
5120 .policy = nl80211_policy, 4525 .policy = nl80211_policy,
5121 .flags = GENL_ADMIN_PERM, 4526 .flags = GENL_ADMIN_PERM,
5122 .doit = nl80211_addset_beacon, 4527 .doit = nl80211_addset_beacon,
4528 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4529 NL80211_FLAG_NEED_RTNL,
5123 }, 4530 },
5124 { 4531 {
5125 .cmd = NL80211_CMD_NEW_BEACON, 4532 .cmd = NL80211_CMD_NEW_BEACON,
5126 .policy = nl80211_policy, 4533 .policy = nl80211_policy,
5127 .flags = GENL_ADMIN_PERM, 4534 .flags = GENL_ADMIN_PERM,
5128 .doit = nl80211_addset_beacon, 4535 .doit = nl80211_addset_beacon,
4536 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4537 NL80211_FLAG_NEED_RTNL,
5129 }, 4538 },
5130 { 4539 {
5131 .cmd = NL80211_CMD_DEL_BEACON, 4540 .cmd = NL80211_CMD_DEL_BEACON,
5132 .policy = nl80211_policy, 4541 .policy = nl80211_policy,
5133 .flags = GENL_ADMIN_PERM, 4542 .flags = GENL_ADMIN_PERM,
5134 .doit = nl80211_del_beacon, 4543 .doit = nl80211_del_beacon,
4544 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4545 NL80211_FLAG_NEED_RTNL,
5135 }, 4546 },
5136 { 4547 {
5137 .cmd = NL80211_CMD_GET_STATION, 4548 .cmd = NL80211_CMD_GET_STATION,
5138 .doit = nl80211_get_station, 4549 .doit = nl80211_get_station,
5139 .dumpit = nl80211_dump_station, 4550 .dumpit = nl80211_dump_station,
5140 .policy = nl80211_policy, 4551 .policy = nl80211_policy,
4552 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4553 NL80211_FLAG_NEED_RTNL,
5141 }, 4554 },
5142 { 4555 {
5143 .cmd = NL80211_CMD_SET_STATION, 4556 .cmd = NL80211_CMD_SET_STATION,
5144 .doit = nl80211_set_station, 4557 .doit = nl80211_set_station,
5145 .policy = nl80211_policy, 4558 .policy = nl80211_policy,
5146 .flags = GENL_ADMIN_PERM, 4559 .flags = GENL_ADMIN_PERM,
4560 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4561 NL80211_FLAG_NEED_RTNL,
5147 }, 4562 },
5148 { 4563 {
5149 .cmd = NL80211_CMD_NEW_STATION, 4564 .cmd = NL80211_CMD_NEW_STATION,
5150 .doit = nl80211_new_station, 4565 .doit = nl80211_new_station,
5151 .policy = nl80211_policy, 4566 .policy = nl80211_policy,
5152 .flags = GENL_ADMIN_PERM, 4567 .flags = GENL_ADMIN_PERM,
4568 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4569 NL80211_FLAG_NEED_RTNL,
5153 }, 4570 },
5154 { 4571 {
5155 .cmd = NL80211_CMD_DEL_STATION, 4572 .cmd = NL80211_CMD_DEL_STATION,
5156 .doit = nl80211_del_station, 4573 .doit = nl80211_del_station,
5157 .policy = nl80211_policy, 4574 .policy = nl80211_policy,
5158 .flags = GENL_ADMIN_PERM, 4575 .flags = GENL_ADMIN_PERM,
4576 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4577 NL80211_FLAG_NEED_RTNL,
5159 }, 4578 },
5160 { 4579 {
5161 .cmd = NL80211_CMD_GET_MPATH, 4580 .cmd = NL80211_CMD_GET_MPATH,
@@ -5163,30 +4582,40 @@ static struct genl_ops nl80211_ops[] = {
5163 .dumpit = nl80211_dump_mpath, 4582 .dumpit = nl80211_dump_mpath,
5164 .policy = nl80211_policy, 4583 .policy = nl80211_policy,
5165 .flags = GENL_ADMIN_PERM, 4584 .flags = GENL_ADMIN_PERM,
4585 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4586 NL80211_FLAG_NEED_RTNL,
5166 }, 4587 },
5167 { 4588 {
5168 .cmd = NL80211_CMD_SET_MPATH, 4589 .cmd = NL80211_CMD_SET_MPATH,
5169 .doit = nl80211_set_mpath, 4590 .doit = nl80211_set_mpath,
5170 .policy = nl80211_policy, 4591 .policy = nl80211_policy,
5171 .flags = GENL_ADMIN_PERM, 4592 .flags = GENL_ADMIN_PERM,
4593 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4594 NL80211_FLAG_NEED_RTNL,
5172 }, 4595 },
5173 { 4596 {
5174 .cmd = NL80211_CMD_NEW_MPATH, 4597 .cmd = NL80211_CMD_NEW_MPATH,
5175 .doit = nl80211_new_mpath, 4598 .doit = nl80211_new_mpath,
5176 .policy = nl80211_policy, 4599 .policy = nl80211_policy,
5177 .flags = GENL_ADMIN_PERM, 4600 .flags = GENL_ADMIN_PERM,
4601 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4602 NL80211_FLAG_NEED_RTNL,
5178 }, 4603 },
5179 { 4604 {
5180 .cmd = NL80211_CMD_DEL_MPATH, 4605 .cmd = NL80211_CMD_DEL_MPATH,
5181 .doit = nl80211_del_mpath, 4606 .doit = nl80211_del_mpath,
5182 .policy = nl80211_policy, 4607 .policy = nl80211_policy,
5183 .flags = GENL_ADMIN_PERM, 4608 .flags = GENL_ADMIN_PERM,
4609 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4610 NL80211_FLAG_NEED_RTNL,
5184 }, 4611 },
5185 { 4612 {
5186 .cmd = NL80211_CMD_SET_BSS, 4613 .cmd = NL80211_CMD_SET_BSS,
5187 .doit = nl80211_set_bss, 4614 .doit = nl80211_set_bss,
5188 .policy = nl80211_policy, 4615 .policy = nl80211_policy,
5189 .flags = GENL_ADMIN_PERM, 4616 .flags = GENL_ADMIN_PERM,
4617 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4618 NL80211_FLAG_NEED_RTNL,
5190 }, 4619 },
5191 { 4620 {
5192 .cmd = NL80211_CMD_GET_REG, 4621 .cmd = NL80211_CMD_GET_REG,
@@ -5211,18 +4640,24 @@ static struct genl_ops nl80211_ops[] = {
5211 .doit = nl80211_get_mesh_params, 4640 .doit = nl80211_get_mesh_params,
5212 .policy = nl80211_policy, 4641 .policy = nl80211_policy,
5213 /* can be retrieved by unprivileged users */ 4642 /* can be retrieved by unprivileged users */
4643 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4644 NL80211_FLAG_NEED_RTNL,
5214 }, 4645 },
5215 { 4646 {
5216 .cmd = NL80211_CMD_SET_MESH_PARAMS, 4647 .cmd = NL80211_CMD_SET_MESH_PARAMS,
5217 .doit = nl80211_set_mesh_params, 4648 .doit = nl80211_set_mesh_params,
5218 .policy = nl80211_policy, 4649 .policy = nl80211_policy,
5219 .flags = GENL_ADMIN_PERM, 4650 .flags = GENL_ADMIN_PERM,
4651 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4652 NL80211_FLAG_NEED_RTNL,
5220 }, 4653 },
5221 { 4654 {
5222 .cmd = NL80211_CMD_TRIGGER_SCAN, 4655 .cmd = NL80211_CMD_TRIGGER_SCAN,
5223 .doit = nl80211_trigger_scan, 4656 .doit = nl80211_trigger_scan,
5224 .policy = nl80211_policy, 4657 .policy = nl80211_policy,
5225 .flags = GENL_ADMIN_PERM, 4658 .flags = GENL_ADMIN_PERM,
4659 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4660 NL80211_FLAG_NEED_RTNL,
5226 }, 4661 },
5227 { 4662 {
5228 .cmd = NL80211_CMD_GET_SCAN, 4663 .cmd = NL80211_CMD_GET_SCAN,
@@ -5234,36 +4669,48 @@ static struct genl_ops nl80211_ops[] = {
5234 .doit = nl80211_authenticate, 4669 .doit = nl80211_authenticate,
5235 .policy = nl80211_policy, 4670 .policy = nl80211_policy,
5236 .flags = GENL_ADMIN_PERM, 4671 .flags = GENL_ADMIN_PERM,
4672 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4673 NL80211_FLAG_NEED_RTNL,
5237 }, 4674 },
5238 { 4675 {
5239 .cmd = NL80211_CMD_ASSOCIATE, 4676 .cmd = NL80211_CMD_ASSOCIATE,
5240 .doit = nl80211_associate, 4677 .doit = nl80211_associate,
5241 .policy = nl80211_policy, 4678 .policy = nl80211_policy,
5242 .flags = GENL_ADMIN_PERM, 4679 .flags = GENL_ADMIN_PERM,
4680 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4681 NL80211_FLAG_NEED_RTNL,
5243 }, 4682 },
5244 { 4683 {
5245 .cmd = NL80211_CMD_DEAUTHENTICATE, 4684 .cmd = NL80211_CMD_DEAUTHENTICATE,
5246 .doit = nl80211_deauthenticate, 4685 .doit = nl80211_deauthenticate,
5247 .policy = nl80211_policy, 4686 .policy = nl80211_policy,
5248 .flags = GENL_ADMIN_PERM, 4687 .flags = GENL_ADMIN_PERM,
4688 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4689 NL80211_FLAG_NEED_RTNL,
5249 }, 4690 },
5250 { 4691 {
5251 .cmd = NL80211_CMD_DISASSOCIATE, 4692 .cmd = NL80211_CMD_DISASSOCIATE,
5252 .doit = nl80211_disassociate, 4693 .doit = nl80211_disassociate,
5253 .policy = nl80211_policy, 4694 .policy = nl80211_policy,
5254 .flags = GENL_ADMIN_PERM, 4695 .flags = GENL_ADMIN_PERM,
4696 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4697 NL80211_FLAG_NEED_RTNL,
5255 }, 4698 },
5256 { 4699 {
5257 .cmd = NL80211_CMD_JOIN_IBSS, 4700 .cmd = NL80211_CMD_JOIN_IBSS,
5258 .doit = nl80211_join_ibss, 4701 .doit = nl80211_join_ibss,
5259 .policy = nl80211_policy, 4702 .policy = nl80211_policy,
5260 .flags = GENL_ADMIN_PERM, 4703 .flags = GENL_ADMIN_PERM,
4704 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4705 NL80211_FLAG_NEED_RTNL,
5261 }, 4706 },
5262 { 4707 {
5263 .cmd = NL80211_CMD_LEAVE_IBSS, 4708 .cmd = NL80211_CMD_LEAVE_IBSS,
5264 .doit = nl80211_leave_ibss, 4709 .doit = nl80211_leave_ibss,
5265 .policy = nl80211_policy, 4710 .policy = nl80211_policy,
5266 .flags = GENL_ADMIN_PERM, 4711 .flags = GENL_ADMIN_PERM,
4712 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4713 NL80211_FLAG_NEED_RTNL,
5267 }, 4714 },
5268#ifdef CONFIG_NL80211_TESTMODE 4715#ifdef CONFIG_NL80211_TESTMODE
5269 { 4716 {
@@ -5271,6 +4718,8 @@ static struct genl_ops nl80211_ops[] = {
5271 .doit = nl80211_testmode_do, 4718 .doit = nl80211_testmode_do,
5272 .policy = nl80211_policy, 4719 .policy = nl80211_policy,
5273 .flags = GENL_ADMIN_PERM, 4720 .flags = GENL_ADMIN_PERM,
4721 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4722 NL80211_FLAG_NEED_RTNL,
5274 }, 4723 },
5275#endif 4724#endif
5276 { 4725 {
@@ -5278,18 +4727,24 @@ static struct genl_ops nl80211_ops[] = {
5278 .doit = nl80211_connect, 4727 .doit = nl80211_connect,
5279 .policy = nl80211_policy, 4728 .policy = nl80211_policy,
5280 .flags = GENL_ADMIN_PERM, 4729 .flags = GENL_ADMIN_PERM,
4730 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4731 NL80211_FLAG_NEED_RTNL,
5281 }, 4732 },
5282 { 4733 {
5283 .cmd = NL80211_CMD_DISCONNECT, 4734 .cmd = NL80211_CMD_DISCONNECT,
5284 .doit = nl80211_disconnect, 4735 .doit = nl80211_disconnect,
5285 .policy = nl80211_policy, 4736 .policy = nl80211_policy,
5286 .flags = GENL_ADMIN_PERM, 4737 .flags = GENL_ADMIN_PERM,
4738 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4739 NL80211_FLAG_NEED_RTNL,
5287 }, 4740 },
5288 { 4741 {
5289 .cmd = NL80211_CMD_SET_WIPHY_NETNS, 4742 .cmd = NL80211_CMD_SET_WIPHY_NETNS,
5290 .doit = nl80211_wiphy_netns, 4743 .doit = nl80211_wiphy_netns,
5291 .policy = nl80211_policy, 4744 .policy = nl80211_policy,
5292 .flags = GENL_ADMIN_PERM, 4745 .flags = GENL_ADMIN_PERM,
4746 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4747 NL80211_FLAG_NEED_RTNL,
5293 }, 4748 },
5294 { 4749 {
5295 .cmd = NL80211_CMD_GET_SURVEY, 4750 .cmd = NL80211_CMD_GET_SURVEY,
@@ -5301,72 +4756,104 @@ static struct genl_ops nl80211_ops[] = {
5301 .doit = nl80211_setdel_pmksa, 4756 .doit = nl80211_setdel_pmksa,
5302 .policy = nl80211_policy, 4757 .policy = nl80211_policy,
5303 .flags = GENL_ADMIN_PERM, 4758 .flags = GENL_ADMIN_PERM,
4759 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4760 NL80211_FLAG_NEED_RTNL,
5304 }, 4761 },
5305 { 4762 {
5306 .cmd = NL80211_CMD_DEL_PMKSA, 4763 .cmd = NL80211_CMD_DEL_PMKSA,
5307 .doit = nl80211_setdel_pmksa, 4764 .doit = nl80211_setdel_pmksa,
5308 .policy = nl80211_policy, 4765 .policy = nl80211_policy,
5309 .flags = GENL_ADMIN_PERM, 4766 .flags = GENL_ADMIN_PERM,
4767 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4768 NL80211_FLAG_NEED_RTNL,
5310 }, 4769 },
5311 { 4770 {
5312 .cmd = NL80211_CMD_FLUSH_PMKSA, 4771 .cmd = NL80211_CMD_FLUSH_PMKSA,
5313 .doit = nl80211_flush_pmksa, 4772 .doit = nl80211_flush_pmksa,
5314 .policy = nl80211_policy, 4773 .policy = nl80211_policy,
5315 .flags = GENL_ADMIN_PERM, 4774 .flags = GENL_ADMIN_PERM,
4775 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4776 NL80211_FLAG_NEED_RTNL,
5316 }, 4777 },
5317 { 4778 {
5318 .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, 4779 .cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
5319 .doit = nl80211_remain_on_channel, 4780 .doit = nl80211_remain_on_channel,
5320 .policy = nl80211_policy, 4781 .policy = nl80211_policy,
5321 .flags = GENL_ADMIN_PERM, 4782 .flags = GENL_ADMIN_PERM,
4783 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4784 NL80211_FLAG_NEED_RTNL,
5322 }, 4785 },
5323 { 4786 {
5324 .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, 4787 .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
5325 .doit = nl80211_cancel_remain_on_channel, 4788 .doit = nl80211_cancel_remain_on_channel,
5326 .policy = nl80211_policy, 4789 .policy = nl80211_policy,
5327 .flags = GENL_ADMIN_PERM, 4790 .flags = GENL_ADMIN_PERM,
4791 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4792 NL80211_FLAG_NEED_RTNL,
5328 }, 4793 },
5329 { 4794 {
5330 .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, 4795 .cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
5331 .doit = nl80211_set_tx_bitrate_mask, 4796 .doit = nl80211_set_tx_bitrate_mask,
5332 .policy = nl80211_policy, 4797 .policy = nl80211_policy,
5333 .flags = GENL_ADMIN_PERM, 4798 .flags = GENL_ADMIN_PERM,
4799 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4800 NL80211_FLAG_NEED_RTNL,
5334 }, 4801 },
5335 { 4802 {
5336 .cmd = NL80211_CMD_REGISTER_ACTION, 4803 .cmd = NL80211_CMD_REGISTER_FRAME,
5337 .doit = nl80211_register_action, 4804 .doit = nl80211_register_mgmt,
5338 .policy = nl80211_policy, 4805 .policy = nl80211_policy,
5339 .flags = GENL_ADMIN_PERM, 4806 .flags = GENL_ADMIN_PERM,
4807 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4808 NL80211_FLAG_NEED_RTNL,
5340 }, 4809 },
5341 { 4810 {
5342 .cmd = NL80211_CMD_ACTION, 4811 .cmd = NL80211_CMD_FRAME,
5343 .doit = nl80211_action, 4812 .doit = nl80211_tx_mgmt,
5344 .policy = nl80211_policy, 4813 .policy = nl80211_policy,
5345 .flags = GENL_ADMIN_PERM, 4814 .flags = GENL_ADMIN_PERM,
4815 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4816 NL80211_FLAG_NEED_RTNL,
5346 }, 4817 },
5347 { 4818 {
5348 .cmd = NL80211_CMD_SET_POWER_SAVE, 4819 .cmd = NL80211_CMD_SET_POWER_SAVE,
5349 .doit = nl80211_set_power_save, 4820 .doit = nl80211_set_power_save,
5350 .policy = nl80211_policy, 4821 .policy = nl80211_policy,
5351 .flags = GENL_ADMIN_PERM, 4822 .flags = GENL_ADMIN_PERM,
4823 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4824 NL80211_FLAG_NEED_RTNL,
5352 }, 4825 },
5353 { 4826 {
5354 .cmd = NL80211_CMD_GET_POWER_SAVE, 4827 .cmd = NL80211_CMD_GET_POWER_SAVE,
5355 .doit = nl80211_get_power_save, 4828 .doit = nl80211_get_power_save,
5356 .policy = nl80211_policy, 4829 .policy = nl80211_policy,
5357 /* can be retrieved by unprivileged users */ 4830 /* can be retrieved by unprivileged users */
4831 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4832 NL80211_FLAG_NEED_RTNL,
5358 }, 4833 },
5359 { 4834 {
5360 .cmd = NL80211_CMD_SET_CQM, 4835 .cmd = NL80211_CMD_SET_CQM,
5361 .doit = nl80211_set_cqm, 4836 .doit = nl80211_set_cqm,
5362 .policy = nl80211_policy, 4837 .policy = nl80211_policy,
5363 .flags = GENL_ADMIN_PERM, 4838 .flags = GENL_ADMIN_PERM,
4839 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4840 NL80211_FLAG_NEED_RTNL,
5364 }, 4841 },
5365 { 4842 {
5366 .cmd = NL80211_CMD_SET_CHANNEL, 4843 .cmd = NL80211_CMD_SET_CHANNEL,
5367 .doit = nl80211_set_channel, 4844 .doit = nl80211_set_channel,
5368 .policy = nl80211_policy, 4845 .policy = nl80211_policy,
5369 .flags = GENL_ADMIN_PERM, 4846 .flags = GENL_ADMIN_PERM,
4847 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4848 NL80211_FLAG_NEED_RTNL,
4849 },
4850 {
4851 .cmd = NL80211_CMD_SET_WDS_PEER,
4852 .doit = nl80211_set_wds_peer,
4853 .policy = nl80211_policy,
4854 .flags = GENL_ADMIN_PERM,
4855 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4856 NL80211_FLAG_NEED_RTNL,
5370 }, 4857 },
5371}; 4858};
5372 4859
@@ -6040,9 +5527,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
6040 nl80211_mlme_mcgrp.id, gfp); 5527 nl80211_mlme_mcgrp.id, gfp);
6041} 5528}
6042 5529
6043int nl80211_send_action(struct cfg80211_registered_device *rdev, 5530int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
6044 struct net_device *netdev, u32 nlpid, 5531 struct net_device *netdev, u32 nlpid,
6045 int freq, const u8 *buf, size_t len, gfp_t gfp) 5532 int freq, const u8 *buf, size_t len, gfp_t gfp)
6046{ 5533{
6047 struct sk_buff *msg; 5534 struct sk_buff *msg;
6048 void *hdr; 5535 void *hdr;
@@ -6052,7 +5539,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6052 if (!msg) 5539 if (!msg)
6053 return -ENOMEM; 5540 return -ENOMEM;
6054 5541
6055 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); 5542 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
6056 if (!hdr) { 5543 if (!hdr) {
6057 nlmsg_free(msg); 5544 nlmsg_free(msg);
6058 return -ENOMEM; 5545 return -ENOMEM;
@@ -6080,10 +5567,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6080 return -ENOBUFS; 5567 return -ENOBUFS;
6081} 5568}
6082 5569
6083void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 5570void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
6084 struct net_device *netdev, u64 cookie, 5571 struct net_device *netdev, u64 cookie,
6085 const u8 *buf, size_t len, bool ack, 5572 const u8 *buf, size_t len, bool ack,
6086 gfp_t gfp) 5573 gfp_t gfp)
6087{ 5574{
6088 struct sk_buff *msg; 5575 struct sk_buff *msg;
6089 void *hdr; 5576 void *hdr;
@@ -6092,7 +5579,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
6092 if (!msg) 5579 if (!msg)
6093 return; 5580 return;
6094 5581
6095 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); 5582 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
6096 if (!hdr) { 5583 if (!hdr) {
6097 nlmsg_free(msg); 5584 nlmsg_free(msg);
6098 return; 5585 return;
@@ -6179,7 +5666,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
6179 5666
6180 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) 5667 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
6181 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) 5668 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
6182 cfg80211_mlme_unregister_actions(wdev, notify->pid); 5669 cfg80211_mlme_unregister_socket(wdev, notify->pid);
6183 5670
6184 rcu_read_unlock(); 5671 rcu_read_unlock();
6185 5672
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f1..30d2f939150d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
74 struct net_device *dev, const u8 *mac_addr, 74 struct net_device *dev, const u8 *mac_addr,
75 struct station_info *sinfo, gfp_t gfp); 75 struct station_info *sinfo, gfp_t gfp);
76 76
77int nl80211_send_action(struct cfg80211_registered_device *rdev, 77int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
78 struct net_device *netdev, u32 nlpid, int freq, 78 struct net_device *netdev, u32 nlpid, int freq,
79 const u8 *buf, size_t len, gfp_t gfp); 79 const u8 *buf, size_t len, gfp_t gfp);
80void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 80void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
81 struct net_device *netdev, u64 cookie, 81 struct net_device *netdev, u64 cookie,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void 85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 1332c445d1c7..dbe35e138e94 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -14,6 +14,7 @@
14 * See COPYING for more details. 14 * See COPYING for more details.
15 */ 15 */
16 16
17#include <linux/kernel.h>
17#include <net/cfg80211.h> 18#include <net/cfg80211.h>
18#include <net/ieee80211_radiotap.h> 19#include <net/ieee80211_radiotap.h>
19#include <asm/unaligned.h> 20#include <asm/unaligned.h>
@@ -45,7 +46,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
45}; 46};
46 47
47static const struct ieee80211_radiotap_namespace radiotap_ns = { 48static const struct ieee80211_radiotap_namespace radiotap_ns = {
48 .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), 49 .n_bits = ARRAY_SIZE(rtap_namespace_sizes),
49 .align_size = rtap_namespace_sizes, 50 .align_size = rtap_namespace_sizes,
50}; 51};
51 52
@@ -200,7 +201,7 @@ int ieee80211_radiotap_iterator_next(
200{ 201{
201 while (1) { 202 while (1) {
202 int hit = 0; 203 int hit = 0;
203 int pad, align, size, subns, vnslen; 204 int pad, align, size, subns;
204 uint32_t oui; 205 uint32_t oui;
205 206
206 /* if no more EXT bits, that's it */ 207 /* if no more EXT bits, that's it */
@@ -260,6 +261,27 @@ int ieee80211_radiotap_iterator_next(
260 if (pad) 261 if (pad)
261 iterator->_arg += align - pad; 262 iterator->_arg += align - pad;
262 263
264 if (iterator->_arg_index % 32 == IEEE80211_RADIOTAP_VENDOR_NAMESPACE) {
265 int vnslen;
266
267 if ((unsigned long)iterator->_arg + size -
268 (unsigned long)iterator->_rtheader >
269 (unsigned long)iterator->_max_length)
270 return -EINVAL;
271
272 oui = (*iterator->_arg << 16) |
273 (*(iterator->_arg + 1) << 8) |
274 *(iterator->_arg + 2);
275 subns = *(iterator->_arg + 3);
276
277 find_ns(iterator, oui, subns);
278
279 vnslen = get_unaligned_le16(iterator->_arg + 4);
280 iterator->_next_ns_data = iterator->_arg + size + vnslen;
281 if (!iterator->current_namespace)
282 size += vnslen;
283 }
284
263 /* 285 /*
264 * this is what we will return to user, but we need to 286 * this is what we will return to user, but we need to
265 * move on first so next call has something fresh to test 287 * move on first so next call has something fresh to test
@@ -286,40 +308,25 @@ int ieee80211_radiotap_iterator_next(
286 /* these special ones are valid in each bitmap word */ 308 /* these special ones are valid in each bitmap word */
287 switch (iterator->_arg_index % 32) { 309 switch (iterator->_arg_index % 32) {
288 case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: 310 case IEEE80211_RADIOTAP_VENDOR_NAMESPACE:
289 iterator->_bitmap_shifter >>= 1;
290 iterator->_arg_index++;
291
292 iterator->_reset_on_ext = 1; 311 iterator->_reset_on_ext = 1;
293 312
294 vnslen = get_unaligned_le16(iterator->this_arg + 4);
295 iterator->_next_ns_data = iterator->_arg + vnslen;
296 oui = (*iterator->this_arg << 16) |
297 (*(iterator->this_arg + 1) << 8) |
298 *(iterator->this_arg + 2);
299 subns = *(iterator->this_arg + 3);
300
301 find_ns(iterator, oui, subns);
302
303 iterator->is_radiotap_ns = 0; 313 iterator->is_radiotap_ns = 0;
304 /* allow parsers to show this information */ 314 /*
315 * If parser didn't register this vendor
316 * namespace with us, allow it to show it
317 * as 'raw. Do do that, set argument index
318 * to vendor namespace.
319 */
305 iterator->this_arg_index = 320 iterator->this_arg_index =
306 IEEE80211_RADIOTAP_VENDOR_NAMESPACE; 321 IEEE80211_RADIOTAP_VENDOR_NAMESPACE;
307 iterator->this_arg_size += vnslen; 322 if (!iterator->current_namespace)
308 if ((unsigned long)iterator->this_arg + 323 hit = 1;
309 iterator->this_arg_size - 324 goto next_entry;
310 (unsigned long)iterator->_rtheader >
311 (unsigned long)(unsigned long)iterator->_max_length)
312 return -EINVAL;
313 hit = 1;
314 break;
315 case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: 325 case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE:
316 iterator->_bitmap_shifter >>= 1;
317 iterator->_arg_index++;
318
319 iterator->_reset_on_ext = 1; 326 iterator->_reset_on_ext = 1;
320 iterator->current_namespace = &radiotap_ns; 327 iterator->current_namespace = &radiotap_ns;
321 iterator->is_radiotap_ns = 1; 328 iterator->is_radiotap_ns = 1;
322 break; 329 goto next_entry;
323 case IEEE80211_RADIOTAP_EXT: 330 case IEEE80211_RADIOTAP_EXT:
324 /* 331 /*
325 * bit 31 was set, there is more 332 * bit 31 was set, there is more
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f180db0de66c..4b9f8912526c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -36,6 +36,7 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/ctype.h>
39#include <linux/nl80211.h> 40#include <linux/nl80211.h>
40#include <linux/platform_device.h> 41#include <linux/platform_device.h>
41#include <net/cfg80211.h> 42#include <net/cfg80211.h>
@@ -73,7 +74,11 @@ const struct ieee80211_regdomain *cfg80211_regdomain;
73 * - last_request 74 * - last_request
74 */ 75 */
75static DEFINE_MUTEX(reg_mutex); 76static DEFINE_MUTEX(reg_mutex);
76#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex)) 77
78static inline void assert_reg_lock(void)
79{
80 lockdep_assert_held(&reg_mutex);
81}
77 82
78/* Used to queue up regulatory hints */ 83/* Used to queue up regulatory hints */
79static LIST_HEAD(reg_requests_list); 84static LIST_HEAD(reg_requests_list);
@@ -181,14 +186,6 @@ static bool is_alpha2_set(const char *alpha2)
181 return false; 186 return false;
182} 187}
183 188
184static bool is_alpha_upper(char letter)
185{
186 /* ASCII A - Z */
187 if (letter >= 65 && letter <= 90)
188 return true;
189 return false;
190}
191
192static bool is_unknown_alpha2(const char *alpha2) 189static bool is_unknown_alpha2(const char *alpha2)
193{ 190{
194 if (!alpha2) 191 if (!alpha2)
@@ -220,7 +217,7 @@ static bool is_an_alpha2(const char *alpha2)
220{ 217{
221 if (!alpha2) 218 if (!alpha2)
222 return false; 219 return false;
223 if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) 220 if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
224 return true; 221 return true;
225 return false; 222 return false;
226} 223}
@@ -1170,7 +1167,7 @@ static int ignore_request(struct wiphy *wiphy,
1170 return 0; 1167 return 0;
1171 return -EALREADY; 1168 return -EALREADY;
1172 } 1169 }
1173 return REG_INTERSECT; 1170 return 0;
1174 case NL80211_REGDOM_SET_BY_DRIVER: 1171 case NL80211_REGDOM_SET_BY_DRIVER:
1175 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { 1172 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
1176 if (regdom_changes(pending_request->alpha2)) 1173 if (regdom_changes(pending_request->alpha2))
@@ -1399,6 +1396,11 @@ static DECLARE_WORK(reg_work, reg_todo);
1399 1396
1400static void queue_regulatory_request(struct regulatory_request *request) 1397static void queue_regulatory_request(struct regulatory_request *request)
1401{ 1398{
1399 if (isalpha(request->alpha2[0]))
1400 request->alpha2[0] = toupper(request->alpha2[0]);
1401 if (isalpha(request->alpha2[1]))
1402 request->alpha2[1] = toupper(request->alpha2[1]);
1403
1402 spin_lock(&reg_requests_lock); 1404 spin_lock(&reg_requests_lock);
1403 list_add_tail(&request->list, &reg_requests_list); 1405 list_add_tail(&request->list, &reg_requests_list);
1404 spin_unlock(&reg_requests_lock); 1406 spin_unlock(&reg_requests_lock);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 5ca8c7180141..503ebb86ba18 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -650,14 +650,14 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
650 bss = container_of(pub, struct cfg80211_internal_bss, pub); 650 bss = container_of(pub, struct cfg80211_internal_bss, pub);
651 651
652 spin_lock_bh(&dev->bss_lock); 652 spin_lock_bh(&dev->bss_lock);
653 if (!list_empty(&bss->list)) {
654 list_del_init(&bss->list);
655 dev->bss_generation++;
656 rb_erase(&bss->rbn, &dev->bss_tree);
653 657
654 list_del(&bss->list); 658 kref_put(&bss->ref, bss_release);
655 dev->bss_generation++; 659 }
656 rb_erase(&bss->rbn, &dev->bss_tree);
657
658 spin_unlock_bh(&dev->bss_lock); 660 spin_unlock_bh(&dev->bss_lock);
659
660 kref_put(&bss->ref, bss_release);
661} 661}
662EXPORT_SYMBOL(cfg80211_unlink_bss); 662EXPORT_SYMBOL(cfg80211_unlink_bss);
663 663
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a8c2d6b877ae..e17b0bee6bdc 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -411,7 +411,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
411 411
412 ASSERT_WDEV_LOCK(wdev); 412 ASSERT_WDEV_LOCK(wdev);
413 413
414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
415 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
415 return; 416 return;
416 417
417 if (wdev->sme_state != CFG80211_SME_CONNECTING) 418 if (wdev->sme_state != CFG80211_SME_CONNECTING)
@@ -548,7 +549,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
548 549
549 ASSERT_WDEV_LOCK(wdev); 550 ASSERT_WDEV_LOCK(wdev);
550 551
551 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 552 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
553 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
552 return; 554 return;
553 555
554 if (wdev->sme_state != CFG80211_SME_CONNECTED) 556 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -644,7 +646,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
644 646
645 ASSERT_WDEV_LOCK(wdev); 647 ASSERT_WDEV_LOCK(wdev);
646 648
647 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 649 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
650 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
648 return; 651 return;
649 652
650 if (wdev->sme_state != CFG80211_SME_CONNECTED) 653 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -695,7 +698,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
695 */ 698 */
696 if (rdev->ops->del_key) 699 if (rdev->ops->del_key)
697 for (i = 0; i < 6; i++) 700 for (i = 0; i < 6; i++)
698 rdev->ops->del_key(wdev->wiphy, dev, i, NULL); 701 rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
699 702
700#ifdef CONFIG_CFG80211_WEXT 703#ifdef CONFIG_CFG80211_WEXT
701 memset(&wrqu, 0, sizeof(wrqu)); 704 memset(&wrqu, 0, sizeof(wrqu));
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca0..4294fa22bb2d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -35,6 +35,14 @@ SHOW_FMT(index, "%d", wiphy_idx);
35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
36SHOW_FMT(address_mask, "%pM", wiphy.addr_mask); 36SHOW_FMT(address_mask, "%pM", wiphy.addr_mask);
37 37
38static ssize_t name_show(struct device *dev,
39 struct device_attribute *attr,
40 char *buf) {
41 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
42 return sprintf(buf, "%s\n", dev_name(&wiphy->dev));
43}
44
45
38static ssize_t addresses_show(struct device *dev, 46static ssize_t addresses_show(struct device *dev,
39 struct device_attribute *attr, 47 struct device_attribute *attr,
40 char *buf) 48 char *buf)
@@ -57,6 +65,7 @@ static struct device_attribute ieee80211_dev_attrs[] = {
57 __ATTR_RO(macaddress), 65 __ATTR_RO(macaddress),
58 __ATTR_RO(address_mask), 66 __ATTR_RO(address_mask),
59 __ATTR_RO(addresses), 67 __ATTR_RO(addresses),
68 __ATTR_RO(name),
60 {} 69 {}
61}; 70};
62 71
@@ -110,6 +119,13 @@ static int wiphy_resume(struct device *dev)
110 return ret; 119 return ret;
111} 120}
112 121
122static const void *wiphy_namespace(struct device *d)
123{
124 struct wiphy *wiphy = container_of(d, struct wiphy, dev);
125
126 return wiphy_net(wiphy);
127}
128
113struct class ieee80211_class = { 129struct class ieee80211_class = {
114 .name = "ieee80211", 130 .name = "ieee80211",
115 .owner = THIS_MODULE, 131 .owner = THIS_MODULE,
@@ -120,6 +136,8 @@ struct class ieee80211_class = {
120#endif 136#endif
121 .suspend = wiphy_suspend, 137 .suspend = wiphy_suspend,
122 .resume = wiphy_resume, 138 .resume = wiphy_resume,
139 .ns_type = &net_ns_type_operations,
140 .namespace = wiphy_namespace,
123}; 141};
124 142
125int wiphy_sysfs_init(void) 143int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b7690..76120aeda57d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -144,19 +144,25 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
144 144
145int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, 145int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
146 struct key_params *params, int key_idx, 146 struct key_params *params, int key_idx,
147 const u8 *mac_addr) 147 bool pairwise, const u8 *mac_addr)
148{ 148{
149 int i; 149 int i;
150 150
151 if (key_idx > 5) 151 if (key_idx > 5)
152 return -EINVAL; 152 return -EINVAL;
153 153
154 if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
155 return -EINVAL;
156
157 if (pairwise && !mac_addr)
158 return -EINVAL;
159
154 /* 160 /*
155 * Disallow pairwise keys with non-zero index unless it's WEP 161 * Disallow pairwise keys with non-zero index unless it's WEP
156 * (because current deployments use pairwise WEP keys with 162 * (because current deployments use pairwise WEP keys with
157 * non-zero indizes but 802.11i clearly specifies to use zero) 163 * non-zero indizes but 802.11i clearly specifies to use zero)
158 */ 164 */
159 if (mac_addr && key_idx && 165 if (pairwise && key_idx &&
160 params->cipher != WLAN_CIPHER_SUITE_WEP40 && 166 params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
161 params->cipher != WLAN_CIPHER_SUITE_WEP104) 167 params->cipher != WLAN_CIPHER_SUITE_WEP104)
162 return -EINVAL; 168 return -EINVAL;
@@ -183,7 +189,14 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
183 return -EINVAL; 189 return -EINVAL;
184 break; 190 break;
185 default: 191 default:
186 return -EINVAL; 192 /*
193 * We don't know anything about this algorithm,
194 * allow using it -- but the driver must check
195 * all parameters! We still check below whether
196 * or not the driver supports this algorithm,
197 * of course.
198 */
199 break;
187 } 200 }
188 201
189 if (params->seq) { 202 if (params->seq) {
@@ -221,7 +234,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
221 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 234 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
222EXPORT_SYMBOL(bridge_tunnel_header); 235EXPORT_SYMBOL(bridge_tunnel_header);
223 236
224unsigned int ieee80211_hdrlen(__le16 fc) 237unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
225{ 238{
226 unsigned int hdrlen = 24; 239 unsigned int hdrlen = 24;
227 240
@@ -319,7 +332,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
319 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 332 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
320 case cpu_to_le16(IEEE80211_FCTL_TODS): 333 case cpu_to_le16(IEEE80211_FCTL_TODS):
321 if (unlikely(iftype != NL80211_IFTYPE_AP && 334 if (unlikely(iftype != NL80211_IFTYPE_AP &&
322 iftype != NL80211_IFTYPE_AP_VLAN)) 335 iftype != NL80211_IFTYPE_AP_VLAN &&
336 iftype != NL80211_IFTYPE_P2P_GO))
323 return -1; 337 return -1;
324 break; 338 break;
325 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 339 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -347,7 +361,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
347 break; 361 break;
348 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 362 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
349 if ((iftype != NL80211_IFTYPE_STATION && 363 if ((iftype != NL80211_IFTYPE_STATION &&
350 iftype != NL80211_IFTYPE_MESH_POINT) || 364 iftype != NL80211_IFTYPE_P2P_CLIENT &&
365 iftype != NL80211_IFTYPE_MESH_POINT) ||
351 (is_multicast_ether_addr(dst) && 366 (is_multicast_ether_addr(dst) &&
352 !compare_ether_addr(src, addr))) 367 !compare_ether_addr(src, addr)))
353 return -1; 368 return -1;
@@ -424,6 +439,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
424 switch (iftype) { 439 switch (iftype) {
425 case NL80211_IFTYPE_AP: 440 case NL80211_IFTYPE_AP:
426 case NL80211_IFTYPE_AP_VLAN: 441 case NL80211_IFTYPE_AP_VLAN:
442 case NL80211_IFTYPE_P2P_GO:
427 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); 443 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
428 /* DA BSSID SA */ 444 /* DA BSSID SA */
429 memcpy(hdr.addr1, skb->data, ETH_ALEN); 445 memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -432,6 +448,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
432 hdrlen = 24; 448 hdrlen = 24;
433 break; 449 break;
434 case NL80211_IFTYPE_STATION: 450 case NL80211_IFTYPE_STATION:
451 case NL80211_IFTYPE_P2P_CLIENT:
435 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 452 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
436 /* BSSID SA DA */ 453 /* BSSID SA DA */
437 memcpy(hdr.addr1, bssid, ETH_ALEN); 454 memcpy(hdr.addr1, bssid, ETH_ALEN);
@@ -666,7 +683,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
666 for (i = 0; i < 6; i++) { 683 for (i = 0; i < 6; i++) {
667 if (!wdev->connect_keys->params[i].cipher) 684 if (!wdev->connect_keys->params[i].cipher)
668 continue; 685 continue;
669 if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL, 686 if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL,
670 &wdev->connect_keys->params[i])) { 687 &wdev->connect_keys->params[i])) {
671 printk(KERN_ERR "%s: failed to set key %d\n", 688 printk(KERN_ERR "%s: failed to set key %d\n",
672 dev->name, i); 689 dev->name, i);
@@ -771,7 +788,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
771 788
772 /* if it's part of a bridge, reject changing type to station/ibss */ 789 /* if it's part of a bridge, reject changing type to station/ibss */
773 if ((dev->priv_flags & IFF_BRIDGE_PORT) && 790 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) 791 (ntype == NL80211_IFTYPE_ADHOC ||
792 ntype == NL80211_IFTYPE_STATION ||
793 ntype == NL80211_IFTYPE_P2P_CLIENT))
775 return -EBUSY; 794 return -EBUSY;
776 795
777 if (ntype != otype) { 796 if (ntype != otype) {
@@ -782,6 +801,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
782 cfg80211_leave_ibss(rdev, dev, false); 801 cfg80211_leave_ibss(rdev, dev, false);
783 break; 802 break;
784 case NL80211_IFTYPE_STATION: 803 case NL80211_IFTYPE_STATION:
804 case NL80211_IFTYPE_P2P_CLIENT:
785 cfg80211_disconnect(rdev, dev, 805 cfg80211_disconnect(rdev, dev,
786 WLAN_REASON_DEAUTH_LEAVING, true); 806 WLAN_REASON_DEAUTH_LEAVING, true);
787 break; 807 break;
@@ -810,9 +830,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
810 if (dev->ieee80211_ptr->use_4addr) 830 if (dev->ieee80211_ptr->use_4addr)
811 break; 831 break;
812 /* fall through */ 832 /* fall through */
833 case NL80211_IFTYPE_P2P_CLIENT:
813 case NL80211_IFTYPE_ADHOC: 834 case NL80211_IFTYPE_ADHOC:
814 dev->priv_flags |= IFF_DONT_BRIDGE; 835 dev->priv_flags |= IFF_DONT_BRIDGE;
815 break; 836 break;
837 case NL80211_IFTYPE_P2P_GO:
816 case NL80211_IFTYPE_AP: 838 case NL80211_IFTYPE_AP:
817 case NL80211_IFTYPE_AP_VLAN: 839 case NL80211_IFTYPE_AP_VLAN:
818 case NL80211_IFTYPE_WDS: 840 case NL80211_IFTYPE_WDS:
@@ -823,7 +845,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
823 /* monitor can't bridge anyway */ 845 /* monitor can't bridge anyway */
824 break; 846 break;
825 case NL80211_IFTYPE_UNSPECIFIED: 847 case NL80211_IFTYPE_UNSPECIFIED:
826 case __NL80211_IFTYPE_AFTER_LAST: 848 case NUM_NL80211_IFTYPES:
827 /* not happening */ 849 /* not happening */
828 break; 850 break;
829 } 851 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7e5c3a45f811..12222ee6ebf2 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -432,14 +432,17 @@ int cfg80211_wext_giwretry(struct net_device *dev,
432EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); 432EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry);
433 433
434static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, 434static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
435 struct net_device *dev, const u8 *addr, 435 struct net_device *dev, bool pairwise,
436 bool remove, bool tx_key, int idx, 436 const u8 *addr, bool remove, bool tx_key,
437 struct key_params *params) 437 int idx, struct key_params *params)
438{ 438{
439 struct wireless_dev *wdev = dev->ieee80211_ptr; 439 struct wireless_dev *wdev = dev->ieee80211_ptr;
440 int err, i; 440 int err, i;
441 bool rejoin = false; 441 bool rejoin = false;
442 442
443 if (pairwise && !addr)
444 return -EINVAL;
445
443 if (!wdev->wext.keys) { 446 if (!wdev->wext.keys) {
444 wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), 447 wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
445 GFP_KERNEL); 448 GFP_KERNEL);
@@ -478,7 +481,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
478 __cfg80211_leave_ibss(rdev, wdev->netdev, true); 481 __cfg80211_leave_ibss(rdev, wdev->netdev, true);
479 rejoin = true; 482 rejoin = true;
480 } 483 }
481 err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); 484
485 if (!pairwise && addr &&
486 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
487 err = -ENOENT;
488 else
489 err = rdev->ops->del_key(&rdev->wiphy, dev, idx,
490 pairwise, addr);
482 } 491 }
483 wdev->wext.connect.privacy = false; 492 wdev->wext.connect.privacy = false;
484 /* 493 /*
@@ -507,12 +516,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
507 if (addr) 516 if (addr)
508 tx_key = false; 517 tx_key = false;
509 518
510 if (cfg80211_validate_key_settings(rdev, params, idx, addr)) 519 if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr))
511 return -EINVAL; 520 return -EINVAL;
512 521
513 err = 0; 522 err = 0;
514 if (wdev->current_bss) 523 if (wdev->current_bss)
515 err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params); 524 err = rdev->ops->add_key(&rdev->wiphy, dev, idx,
525 pairwise, addr, params);
516 if (err) 526 if (err)
517 return err; 527 return err;
518 528
@@ -563,17 +573,17 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
563} 573}
564 574
565static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, 575static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
566 struct net_device *dev, const u8 *addr, 576 struct net_device *dev, bool pairwise,
567 bool remove, bool tx_key, int idx, 577 const u8 *addr, bool remove, bool tx_key,
568 struct key_params *params) 578 int idx, struct key_params *params)
569{ 579{
570 int err; 580 int err;
571 581
572 /* devlist mutex needed for possible IBSS re-join */ 582 /* devlist mutex needed for possible IBSS re-join */
573 mutex_lock(&rdev->devlist_mtx); 583 mutex_lock(&rdev->devlist_mtx);
574 wdev_lock(dev->ieee80211_ptr); 584 wdev_lock(dev->ieee80211_ptr);
575 err = __cfg80211_set_encryption(rdev, dev, addr, remove, 585 err = __cfg80211_set_encryption(rdev, dev, pairwise, addr,
576 tx_key, idx, params); 586 remove, tx_key, idx, params);
577 wdev_unlock(dev->ieee80211_ptr); 587 wdev_unlock(dev->ieee80211_ptr);
578 mutex_unlock(&rdev->devlist_mtx); 588 mutex_unlock(&rdev->devlist_mtx);
579 589
@@ -635,7 +645,7 @@ int cfg80211_wext_siwencode(struct net_device *dev,
635 else if (!remove) 645 else if (!remove)
636 return -EINVAL; 646 return -EINVAL;
637 647
638 return cfg80211_set_encryption(rdev, dev, NULL, remove, 648 return cfg80211_set_encryption(rdev, dev, false, NULL, remove,
639 wdev->wext.default_key == -1, 649 wdev->wext.default_key == -1,
640 idx, &params); 650 idx, &params);
641} 651}
@@ -725,7 +735,9 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
725 } 735 }
726 736
727 return cfg80211_set_encryption( 737 return cfg80211_set_encryption(
728 rdev, dev, addr, remove, 738 rdev, dev,
739 !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY),
740 addr, remove,
729 ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, 741 ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
730 idx, &params); 742 idx, &params);
731} 743}
@@ -1354,6 +1366,10 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
1354 } 1366 }
1355 1367
1356 wstats.qual.updated |= IW_QUAL_NOISE_INVALID; 1368 wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
1369 if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
1370 wstats.discard.misc = sinfo.rx_dropped_misc;
1371 if (sinfo.filled & STATION_INFO_TX_FAILED)
1372 wstats.discard.retries = sinfo.tx_failed;
1357 1373
1358 return &wstats; 1374 return &wstats;
1359} 1375}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 8f5116f5af19..dc675a3daa3d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -611,7 +611,7 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
611#endif 611#endif
612 612
613#ifdef CONFIG_CFG80211_WEXT 613#ifdef CONFIG_CFG80211_WEXT
614 if (dev->ieee80211_ptr && dev->ieee80211_ptr && 614 if (dev->ieee80211_ptr &&
615 dev->ieee80211_ptr->wiphy && 615 dev->ieee80211_ptr->wiphy &&
616 dev->ieee80211_ptr->wiphy->wext && 616 dev->ieee80211_ptr->wiphy->wext &&
617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) 617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8a..6fffe62d7c25 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
197 wdev->wext.connect.ssid_len = len; 197 wdev->wext.connect.ssid_len = len;
198 198
199 wdev->wext.connect.crypto.control_port = false; 199 wdev->wext.connect.crypto.control_port = false;
200 wdev->wext.connect.crypto.control_port_ethertype =
201 cpu_to_be16(ETH_P_PAE);
200 202
201 err = cfg80211_mgd_wext_connect(rdev, wdev); 203 err = cfg80211_mgd_wext_connect(rdev, wdev);
202 out: 204 out:
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index e6759c9660bb..2196e55e4f61 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,6 +5,7 @@
5config X25 5config X25
6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" 6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
7 depends on EXPERIMENTAL 7 depends on EXPERIMENTAL
8 depends on BKL # should be fixable
8 ---help--- 9 ---help---
9 X.25 is a set of standardized network protocols, similar in scope to 10 X.25 is a set of standardized network protocols, similar in scope to
10 frame relay; the one physical line from your box to the X.25 network 11 frame relay; the one physical line from your box to the X.25 network
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e86d4e97dce..f7af98dff409 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -507,14 +507,14 @@ static int x25_listen(struct socket *sock, int backlog)
507 struct sock *sk = sock->sk; 507 struct sock *sk = sock->sk;
508 int rc = -EOPNOTSUPP; 508 int rc = -EOPNOTSUPP;
509 509
510 lock_kernel(); 510 lock_sock(sk);
511 if (sk->sk_state != TCP_LISTEN) { 511 if (sk->sk_state != TCP_LISTEN) {
512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); 512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
513 sk->sk_max_ack_backlog = backlog; 513 sk->sk_max_ack_backlog = backlog;
514 sk->sk_state = TCP_LISTEN; 514 sk->sk_state = TCP_LISTEN;
515 rc = 0; 515 rc = 0;
516 } 516 }
517 unlock_kernel(); 517 release_sock(sk);
518 518
519 return rc; 519 return rc;
520} 520}
@@ -688,7 +688,6 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; 688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
689 int len, i, rc = 0; 689 int len, i, rc = 0;
690 690
691 lock_kernel();
692 if (!sock_flag(sk, SOCK_ZAPPED) || 691 if (!sock_flag(sk, SOCK_ZAPPED) ||
693 addr_len != sizeof(struct sockaddr_x25) || 692 addr_len != sizeof(struct sockaddr_x25) ||
694 addr->sx25_family != AF_X25) { 693 addr->sx25_family != AF_X25) {
@@ -704,12 +703,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
704 } 703 }
705 } 704 }
706 705
706 lock_sock(sk);
707 x25_sk(sk)->source_addr = addr->sx25_addr; 707 x25_sk(sk)->source_addr = addr->sx25_addr;
708 x25_insert_socket(sk); 708 x25_insert_socket(sk);
709 sock_reset_flag(sk, SOCK_ZAPPED); 709 sock_reset_flag(sk, SOCK_ZAPPED);
710 release_sock(sk);
710 SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); 711 SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
711out: 712out:
712 unlock_kernel();
713 return rc; 713 return rc;
714} 714}
715 715
@@ -751,7 +751,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
751 struct x25_route *rt; 751 struct x25_route *rt;
752 int rc = 0; 752 int rc = 0;
753 753
754 lock_kernel();
755 lock_sock(sk); 754 lock_sock(sk);
756 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { 755 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
757 sock->state = SS_CONNECTED; 756 sock->state = SS_CONNECTED;
@@ -829,7 +828,6 @@ out_put_route:
829 x25_route_put(rt); 828 x25_route_put(rt);
830out: 829out:
831 release_sock(sk); 830 release_sock(sk);
832 unlock_kernel();
833 return rc; 831 return rc;
834} 832}
835 833
@@ -869,8 +867,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
869 struct sk_buff *skb; 867 struct sk_buff *skb;
870 int rc = -EINVAL; 868 int rc = -EINVAL;
871 869
872 lock_kernel(); 870 if (!sk)
873 if (!sk || sk->sk_state != TCP_LISTEN)
874 goto out; 871 goto out;
875 872
876 rc = -EOPNOTSUPP; 873 rc = -EOPNOTSUPP;
@@ -878,6 +875,10 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
878 goto out; 875 goto out;
879 876
880 lock_sock(sk); 877 lock_sock(sk);
878 rc = -EINVAL;
879 if (sk->sk_state != TCP_LISTEN)
880 goto out2;
881
881 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); 882 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
882 if (rc) 883 if (rc)
883 goto out2; 884 goto out2;
@@ -897,7 +898,6 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
897out2: 898out2:
898 release_sock(sk); 899 release_sock(sk);
899out: 900out:
900 unlock_kernel();
901 return rc; 901 return rc;
902} 902}
903 903
@@ -909,7 +909,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
909 struct x25_sock *x25 = x25_sk(sk); 909 struct x25_sock *x25 = x25_sk(sk);
910 int rc = 0; 910 int rc = 0;
911 911
912 lock_kernel();
913 if (peer) { 912 if (peer) {
914 if (sk->sk_state != TCP_ESTABLISHED) { 913 if (sk->sk_state != TCP_ESTABLISHED) {
915 rc = -ENOTCONN; 914 rc = -ENOTCONN;
@@ -923,19 +922,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
923 *uaddr_len = sizeof(*sx25); 922 *uaddr_len = sizeof(*sx25);
924 923
925out: 924out:
926 unlock_kernel();
927 return rc;
928}
929
930static unsigned int x25_datagram_poll(struct file *file, struct socket *sock,
931 poll_table *wait)
932{
933 int rc;
934
935 lock_kernel();
936 rc = datagram_poll(file, sock, wait);
937 unlock_kernel();
938
939 return rc; 925 return rc;
940} 926}
941 927
@@ -1746,7 +1732,7 @@ static const struct proto_ops x25_proto_ops = {
1746 .socketpair = sock_no_socketpair, 1732 .socketpair = sock_no_socketpair,
1747 .accept = x25_accept, 1733 .accept = x25_accept,
1748 .getname = x25_getname, 1734 .getname = x25_getname,
1749 .poll = x25_datagram_poll, 1735 .poll = datagram_poll,
1750 .ioctl = x25_ioctl, 1736 .ioctl = x25_ioctl,
1751#ifdef CONFIG_COMPAT 1737#ifdef CONFIG_COMPAT
1752 .compat_ioctl = compat_x25_ioctl, 1738 .compat_ioctl = compat_x25_ioctl,
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 771bab00754b..55187c8f6420 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -61,6 +61,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
61 while (len > 0) { 61 while (len > 0) {
62 switch (*p & X25_FAC_CLASS_MASK) { 62 switch (*p & X25_FAC_CLASS_MASK) {
63 case X25_FAC_CLASS_A: 63 case X25_FAC_CLASS_A:
64 if (len < 2)
65 return 0;
64 switch (*p) { 66 switch (*p) {
65 case X25_FAC_REVERSE: 67 case X25_FAC_REVERSE:
66 if((p[1] & 0x81) == 0x81) { 68 if((p[1] & 0x81) == 0x81) {
@@ -104,6 +106,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
104 len -= 2; 106 len -= 2;
105 break; 107 break;
106 case X25_FAC_CLASS_B: 108 case X25_FAC_CLASS_B:
109 if (len < 3)
110 return 0;
107 switch (*p) { 111 switch (*p) {
108 case X25_FAC_PACKET_SIZE: 112 case X25_FAC_PACKET_SIZE:
109 facilities->pacsize_in = p[1]; 113 facilities->pacsize_in = p[1];
@@ -125,6 +129,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
125 len -= 3; 129 len -= 3;
126 break; 130 break;
127 case X25_FAC_CLASS_C: 131 case X25_FAC_CLASS_C:
132 if (len < 4)
133 return 0;
128 printk(KERN_DEBUG "X.25: unknown facility %02X, " 134 printk(KERN_DEBUG "X.25: unknown facility %02X, "
129 "values %02X, %02X, %02X\n", 135 "values %02X, %02X, %02X\n",
130 p[0], p[1], p[2], p[3]); 136 p[0], p[1], p[2], p[3]);
@@ -132,26 +138,26 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
132 len -= 4; 138 len -= 4;
133 break; 139 break;
134 case X25_FAC_CLASS_D: 140 case X25_FAC_CLASS_D:
141 if (len < p[1] + 2)
142 return 0;
135 switch (*p) { 143 switch (*p) {
136 case X25_FAC_CALLING_AE: 144 case X25_FAC_CALLING_AE:
137 if (p[1] > X25_MAX_DTE_FACIL_LEN) 145 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
138 break; 146 return 0;
139 dte_facs->calling_len = p[2]; 147 dte_facs->calling_len = p[2];
140 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1); 148 memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
141 *vc_fac_mask |= X25_MASK_CALLING_AE; 149 *vc_fac_mask |= X25_MASK_CALLING_AE;
142 break; 150 break;
143 case X25_FAC_CALLED_AE: 151 case X25_FAC_CALLED_AE:
144 if (p[1] > X25_MAX_DTE_FACIL_LEN) 152 if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
145 break; 153 return 0;
146 dte_facs->called_len = p[2]; 154 dte_facs->called_len = p[2];
147 memcpy(dte_facs->called_ae, &p[3], p[1] - 1); 155 memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
148 *vc_fac_mask |= X25_MASK_CALLED_AE; 156 *vc_fac_mask |= X25_MASK_CALLED_AE;
149 break; 157 break;
150 default: 158 default:
151 printk(KERN_DEBUG "X.25: unknown facility %02X," 159 printk(KERN_DEBUG "X.25: unknown facility %02X,"
152 "length %d, values %02X, %02X, " 160 "length %d\n", p[0], p[1]);
153 "%02X, %02X\n",
154 p[0], p[1], p[2], p[3], p[4], p[5]);
155 break; 161 break;
156 } 162 }
157 len -= p[1] + 2; 163 len -= p[1] + 2;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 63178961efac..f729f022be69 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -119,6 +119,8 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
119 &x25->vc_facil_mask); 119 &x25->vc_facil_mask);
120 if (len > 0) 120 if (len > 0)
121 skb_pull(skb, len); 121 skb_pull(skb, len);
122 else
123 return -1;
122 /* 124 /*
123 * Copy any Call User Data. 125 * Copy any Call User Data.
124 */ 126 */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cbab6e1a8c9c..044e77898512 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -50,6 +50,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
51static void xfrm_init_pmtu(struct dst_entry *dst); 51static void xfrm_init_pmtu(struct dst_entry *dst);
52static int stale_bundle(struct dst_entry *dst); 52static int stale_bundle(struct dst_entry *dst);
53static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
54 struct flowi *fl, int family, int strict);
55
53 56
54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 57static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
55 int dir); 58 int dir);
@@ -2276,7 +2279,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2276 * still valid. 2279 * still valid.
2277 */ 2280 */
2278 2281
2279int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2282static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2280 struct flowi *fl, int family, int strict) 2283 struct flowi *fl, int family, int strict)
2281{ 2284{
2282 struct dst_entry *dst = &first->u.dst; 2285 struct dst_entry *dst = &first->u.dst;
@@ -2358,8 +2361,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2358 return 1; 2361 return 1;
2359} 2362}
2360 2363
2361EXPORT_SYMBOL(xfrm_bundle_ok);
2362
2363int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2364int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2364{ 2365{
2365 struct net *net; 2366 struct net *net;